diff -Nur linux-4.2/Documentation/Makefile linux-4.2-pck/Documentation/Makefile
--- linux-4.2/Documentation/Makefile	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/Documentation/Makefile	2015-09-08 00:48:22.208364829 -0300
@@ -1,4 +1,4 @@
 subdir-y := accounting auxdisplay blackfin connector \
-	filesystems filesystems ia64 laptops mic misc-devices \
+	filesystems filesystems ia64 kdbus laptops mic misc-devices \
 	networking pcmcia prctl ptp spi timers vDSO video4linux \
 	watchdog
diff -Nur linux-4.2/Documentation/ioctl/ioctl-number.txt linux-4.2-pck/Documentation/ioctl/ioctl-number.txt
--- linux-4.2/Documentation/ioctl/ioctl-number.txt	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/Documentation/ioctl/ioctl-number.txt	2015-09-08 00:48:22.208364829 -0300
@@ -292,6 +292,7 @@
 0x92	00-0F	drivers/usb/mon/mon_bin.c
 0x93	60-7F	linux/auto_fs.h
 0x94	all	fs/btrfs/ioctl.h
+0x95	all	uapi/linux/kdbus.h	kdbus IPC driver
 0x97	00-7F	fs/ceph/ioctl.h		Ceph file system
 0x99	00-0F				537-Addinboard driver
 					<mailto:buk@buks.ipn.de>
diff -Nur linux-4.2/Documentation/kdbus/.gitignore linux-4.2-pck/Documentation/kdbus/.gitignore
--- linux-4.2/Documentation/kdbus/.gitignore	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/.gitignore	2015-09-08 00:48:22.208364829 -0300
@@ -0,0 +1,2 @@
+*.7
+*.html
diff -Nur linux-4.2/Documentation/kdbus/Makefile linux-4.2-pck/Documentation/kdbus/Makefile
--- linux-4.2/Documentation/kdbus/Makefile	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/Makefile	2015-09-08 00:48:22.208364829 -0300
@@ -0,0 +1,44 @@
+DOCS :=	\
+	kdbus.xml		\
+	kdbus.bus.xml		\
+	kdbus.connection.xml	\
+	kdbus.endpoint.xml	\
+	kdbus.fs.xml		\
+	kdbus.item.xml		\
+	kdbus.match.xml		\
+	kdbus.message.xml	\
+	kdbus.name.xml		\
+	kdbus.policy.xml	\
+	kdbus.pool.xml
+
+XMLFILES := $(addprefix $(obj)/,$(DOCS))
+MANFILES := $(patsubst %.xml, %.7, $(XMLFILES))
+HTMLFILES := $(patsubst %.xml, %.html, $(XMLFILES))
+
+XMLTO_ARGS := -m $(srctree)/$(src)/stylesheet.xsl --skip-validation
+
+quiet_cmd_db2man = MAN     $@
+      cmd_db2man = xmlto man $(XMLTO_ARGS) -o $(obj) $<
+%.7: %.xml
+	@(which xmlto > /dev/null 2>&1) || \
+	 (echo "*** You need to install xmlto ***"; \
+	  exit 1)
+	$(call cmd,db2man)
+
+quiet_cmd_db2html = HTML    $@
+      cmd_db2html = xmlto html-nochunks $(XMLTO_ARGS) -o $(obj) $<
+%.html: %.xml
+	@(which xmlto > /dev/null 2>&1) || \
+	 (echo "*** You need to install xmlto ***"; \
+	  exit 1)
+	$(call cmd,db2html)
+
+mandocs: $(MANFILES)
+
+htmldocs: $(HTMLFILES)
+
+clean-files := $(MANFILES) $(HTMLFILES)
+
+# we don't support other %docs targets right now
+%docs:
+	@true
diff -Nur linux-4.2/Documentation/kdbus/kdbus.bus.xml linux-4.2-pck/Documentation/kdbus/kdbus.bus.xml
--- linux-4.2/Documentation/kdbus/kdbus.bus.xml	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/kdbus.bus.xml	2015-09-08 00:48:22.208364829 -0300
@@ -0,0 +1,344 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.bus">
+
+  <refentryinfo>
+    <title>kdbus.bus</title>
+    <productname>kdbus.bus</productname>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>kdbus.bus</refentrytitle>
+    <manvolnum>7</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>kdbus.bus</refname>
+    <refpurpose>kdbus bus</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      A bus is a resource that is shared between connections in order to
+      transmit messages (see
+      <citerefentry>
+        <refentrytitle>kdbus.message</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>).
+      Each bus is independent, and operations on the bus will not have any
+      effect on other buses. A bus is a management entity that controls the
+      addresses of its connections, their policies and message transactions
+      performed via this bus.
+    </para>
+    <para>
+      Each bus is bound to the mount instance it was created on. It has a
+      custom name that is unique across all buses of a domain. In
+      <citerefentry>
+        <refentrytitle>kdbus.fs</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      a bus is presented as a directory. No operations can be performed on
+      the bus itself; instead you need to perform the operations on an endpoint
+      associated with the bus. Endpoints are accessible as files underneath the
+      bus directory. A default endpoint called <constant>bus</constant> is
+      provided on each bus.
+    </para>
+    <para>
+      Bus names may be chosen freely except for one restriction: the name must
+      be prefixed with the numeric effective UID of the creator and a dash. This
+      is required to avoid namespace clashes between different users. When
+      creating a bus, the name that is passed in must be properly formatted, or
+      the kernel will refuse creation of the bus. Example:
+      <literal>1047-foobar</literal> is an acceptable name for a bus
+      registered by a user with UID 1047. However,
+      <literal>1024-foobar</literal> is not, and neither is
+      <literal>foobar</literal>. The UID must be provided in the
+      user-namespace of the bus owner.
+    </para>
+    <para>
+      To create a new bus, you need to open the control file of a domain and
+      employ the <constant>KDBUS_CMD_BUS_MAKE</constant> ioctl. The control
+      file descriptor that was used to issue
+      <constant>KDBUS_CMD_BUS_MAKE</constant> must not previously have been
+      used for any other control-ioctl and must be kept open for the entire
+      life-time of the created bus. Closing it will immediately cleanup the
+      entire bus and all its associated resources and endpoints. Every control
+      file descriptor can only be used to create a single new bus; from that
+      point on, it is not used for any further communication until the final
+      <citerefentry>
+        <refentrytitle>close</refentrytitle>
+        <manvolnum>2</manvolnum>
+      </citerefentry>
+      .
+    </para>
+    <para>
+      Each bus will generate a random, 128-bit UUID upon creation. This UUID
+      will be returned to creators of connections through
+      <varname>kdbus_cmd_hello.id128</varname> and can be used to uniquely
+      identify buses, even across different machines or containers. The UUID
+      will have its variant bits set to <literal>DCE</literal>, and denote
+      version 4 (random). For more details on UUIDs, see <ulink
+      url="https://en.wikipedia.org/wiki/Universally_unique_identifier">
+      the Wikipedia article on UUIDs</ulink>.
+    </para>
+
+  </refsect1>
+
+  <refsect1>
+    <title>Creating buses</title>
+    <para>
+      To create a new bus, the <constant>KDBUS_CMD_BUS_MAKE</constant>
+      command is used. It takes a <type>struct kdbus_cmd</type> argument.
+    </para>
+    <programlisting>
+struct kdbus_cmd {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>The flags for creation.</para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_MAKE_ACCESS_GROUP</constant></term>
+              <listitem>
+                <para>Make the bus file group-accessible.</para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_MAKE_ACCESS_WORLD</constant></term>
+              <listitem>
+                <para>Make the bus file world-accessible.</para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+              <listitem>
+                <para>
+                  Requests a set of valid flags for this ioctl. When this bit is
+                  set, no action is taken; the ioctl will return
+                  <errorcode>0</errorcode>, and the <varname>flags</varname>
+                  field will have all bits set that are valid for this command.
+                  The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+                  cleared by the operation.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            The following items (see
+            <citerefentry>
+              <refentrytitle>kdbus.item</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>)
+            are expected for <constant>KDBUS_CMD_BUS_MAKE</constant>.
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_MAKE_NAME</constant></term>
+              <listitem>
+                <para>
+                  Contains a null-terminated string that identifies the
+                  bus. The name must be unique across the kdbus domain and
+                  must start with the effective UID of the caller, followed by
+                  a '<literal>-</literal>' (dash). This item is mandatory.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_BLOOM_PARAMETER</constant></term>
+              <listitem>
+                <para>
+                  Bus-wide bloom parameters passed in a
+                  <type>struct kdbus_bloom_parameter</type>. These settings are
+                  copied back to new connections verbatim. This item is
+                  mandatory. See
+                  <citerefentry>
+                    <refentrytitle>kdbus.item</refentrytitle>
+                    <manvolnum>7</manvolnum>
+                  </citerefentry>
+                  for a more detailed description of this item.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_ATTACH_FLAGS_SEND</constant></term>
+              <listitem>
+                <para>
+                  An optional item that contains a set of attach flags that are
+                  returned to connections when they query the bus creator
+                  metadata. If not set, no metadata is returned.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+              <listitem><para>
+                With this item, programs can <emphasis>probe</emphasis> the
+                kernel for known item types. See
+                <citerefentry>
+                  <refentrytitle>kdbus.item</refentrytitle>
+                  <manvolnum>7</manvolnum>
+                </citerefentry>
+                for more details.
+              </para></listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      Unrecognized items are rejected, and the ioctl will fail with
+      <varname>errno</varname> set to <constant>EINVAL</constant>.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Return value</title>
+    <para>
+      On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+      on error, <errorcode>-1</errorcode> is returned, and
+      <varname>errno</varname> is set to indicate the error.
+      If the issued ioctl is illegal for the file descriptor used,
+      <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+    </para>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_BUS_MAKE</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EBADMSG</constant></term>
+          <listitem><para>
+            A mandatory item is missing.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            The flags supplied in the <constant>struct kdbus_cmd</constant>
+            are invalid or the supplied name does not start with the current
+            UID and a '<literal>-</literal>' (dash).
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EEXIST</constant></term>
+          <listitem><para>
+            A bus of that name already exists.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ESHUTDOWN</constant></term>
+          <listitem><para>
+            The kdbus mount instance for the bus was already shut down.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EMFILE</constant></term>
+          <listitem><para>
+            The maximum number of buses for the current user is exhausted.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>See Also</title>
+    <simplelist type="inline">
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.connection</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.endpoint</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.fs</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.item</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.message</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.name</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.pool</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+    </simplelist>
+  </refsect1>
+</refentry>
diff -Nur linux-4.2/Documentation/kdbus/kdbus.connection.xml linux-4.2-pck/Documentation/kdbus/kdbus.connection.xml
--- linux-4.2/Documentation/kdbus/kdbus.connection.xml	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/kdbus.connection.xml	2015-09-08 00:48:22.208364829 -0300
@@ -0,0 +1,1244 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.connection">
+
+  <refentryinfo>
+    <title>kdbus.connection</title>
+    <productname>kdbus.connection</productname>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>kdbus.connection</refentrytitle>
+    <manvolnum>7</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>kdbus.connection</refname>
+    <refpurpose>kdbus connection</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Connections are identified by their <emphasis>connection ID</emphasis>,
+      internally implemented as a <type>uint64_t</type> counter.
+      The IDs of every newly created bus start at <constant>1</constant>, and
+      every new connection will increment the counter by <constant>1</constant>.
+      The IDs are not reused.
+    </para>
+    <para>
+      In higher level tools, the user visible representation of a connection is
+      defined by the D-Bus protocol specification as
+      <constant>":1.&lt;ID&gt;"</constant>.
+    </para>
+    <para>
+      Messages with a specific <type>uint64_t</type> destination ID are
+      directly delivered to the connection with the corresponding ID. Signal
+      messages (see
+      <citerefentry>
+        <refentrytitle>kdbus.message</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>)
+      may be addressed to the special destination ID
+      <constant>KDBUS_DST_ID_BROADCAST</constant> (~0ULL) and will then
+      potentially be delivered to all currently active connections on the bus.
+      However, in order to receive any signal messages, clients must subscribe
+      to them by installing a match (see
+      <citerefentry>
+        <refentrytitle>kdbus.match</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>).
+    </para>
+    <para>
+      Messages synthesized and sent directly by the kernel will carry the
+      special source ID <constant>KDBUS_SRC_ID_KERNEL</constant> (0).
+    </para>
+    <para>
+      In addition to the unique <type>uint64_t</type> connection ID,
+      established connections can request the ownership of
+      <emphasis>well-known names</emphasis>, under which they can be found and
+      addressed by other bus clients. A well-known name is associated with one
+      and only one connection at a time. See
+      <citerefentry>
+        <refentrytitle>kdbus.name</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      on name acquisition, the name registry, and the validity of names.
+    </para>
+    <para>
+      Messages can specify the special destination ID
+      <constant>KDBUS_DST_ID_NAME</constant> (0) and carry a well-known name
+      in the message data. Such a message is delivered to the destination
+      connection which owns that well-known name.
+    </para>
+
+    <programlisting><![CDATA[
+  +-------------------------------------------------------------------------+
+  | +---------------+     +---------------------------+                     |
+  | | Connection    |     | Message                   | -----------------+  |
+  | | :1.22         | --> | src: 22                   |                  |  |
+  | |               |     | dst: 25                   |                  |  |
+  | |               |     |                           |                  |  |
+  | |               |     |                           |                  |  |
+  | |               |     +---------------------------+                  |  |
+  | |               |                                                    |  |
+  | |               | <--------------------------------------+           |  |
+  | +---------------+                                        |           |  |
+  |                                                          |           |  |
+  | +---------------+     +---------------------------+      |           |  |
+  | | Connection    |     | Message                   | -----+           |  |
+  | | :1.25         | --> | src: 25                   |                  |  |
+  | |               |     | dst: 0xffffffffffffffff   | -------------+   |  |
+  | |               |     |  (KDBUS_DST_ID_BROADCAST) |              |   |  |
+  | |               |     |                           | ---------+   |   |  |
+  | |               |     +---------------------------+          |   |   |  |
+  | |               |                                            |   |   |  |
+  | |               | <--------------------------------------------------+  |
+  | +---------------+                                            |   |      |
+  |                                                              |   |      |
+  | +---------------+     +---------------------------+          |   |      |
+  | | Connection    |     | Message                   | --+      |   |      |
+  | | :1.55         | --> | src: 55                   |   |      |   |      |
+  | |               |     | dst: 0 / org.foo.bar      |   |      |   |      |
+  | |               |     |                           |   |      |   |      |
+  | |               |     |                           |   |      |   |      |
+  | |               |     +---------------------------+   |      |   |      |
+  | |               |                                     |      |   |      |
+  | |               | <------------------------------------------+   |      |
+  | +---------------+                                     |          |      |
+  |                                                       |          |      |
+  | +---------------+                                     |          |      |
+  | | Connection    |                                     |          |      |
+  | | :1.81         |                                     |          |      |
+  | | org.foo.bar   |                                     |          |      |
+  | |               |                                     |          |      |
+  | |               |                                     |          |      |
+  | |               | <-----------------------------------+          |      |
+  | |               |                                                |      |
+  | |               | <----------------------------------------------+      |
+  | +---------------+                                                       |
+  +-------------------------------------------------------------------------+
+    ]]></programlisting>
+  </refsect1>
+
+  <refsect1>
+    <title>Privileged connections</title>
+    <para>
+      A connection is considered <emphasis>privileged</emphasis> if the user
+      it was created by is the same that created the bus, or if the creating
+      task had <constant>CAP_IPC_OWNER</constant> set when it called
+      <constant>KDBUS_CMD_HELLO</constant> (see below).
+    </para>
+    <para>
+      Privileged connections have permission to employ certain restricted
+      functions and commands, which are explained below and in other kdbus
+      man-pages.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Activator and policy holder connection</title>
+    <para>
+      An <emphasis>activator</emphasis> connection is a placeholder for a
+      <emphasis>well-known name</emphasis>. Messages sent to such a connection
+      can be used to start an implementer connection, which will then get all
+      the messages from the activator copied over. An activator connection
+      cannot be used to send any message.
+    </para>
+    <para>
+      A <emphasis>policy holder</emphasis> connection only installs a policy
+      for one or more names. These policy entries are kept active as long as
+      the connection is alive, and are removed once it terminates. Such a
+      policy connection type can be used to deploy restrictions for names that
+      are not yet active on the bus. A policy holder connection cannot be used
+      to send any message.
+    </para>
+    <para>
+      The creation of activator or policy holder connections is restricted to
+      privileged users on the bus (see above).
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Monitor connections</title>
+    <para>
+      Monitors are eavesdropping connections that receive all the traffic on the
+      bus, but is invisible to other connections. Such connections have all
+      properties of any other, regular connection, except for the following
+      details:
+    </para>
+
+    <itemizedlist>
+      <listitem><para>
+        They will get every message sent over the bus, both unicasts and
+        broadcasts.
+      </para></listitem>
+
+      <listitem><para>
+        Installing matches for signal messages is neither necessary
+        nor allowed.
+      </para></listitem>
+
+      <listitem><para>
+        They cannot send messages or be directly addressed as receiver.
+      </para></listitem>
+
+      <listitem><para>
+        They cannot own well-known names. Therefore, they also can't operate as
+        activators.
+      </para></listitem>
+
+      <listitem><para>
+        Their creation and destruction will not cause
+        <constant>KDBUS_ITEM_ID_{ADD,REMOVE}</constant> (see
+        <citerefentry>
+          <refentrytitle>kdbus.item</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>).
+      </para></listitem>
+
+      <listitem><para>
+        They are not listed with their unique name in name registry dumps
+        (see <constant>KDBUS_CMD_NAME_LIST</constant> in
+        <citerefentry>
+          <refentrytitle>kdbus.name</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>), so other connections cannot detect the presence of
+	a monitor.
+      </para></listitem>
+    </itemizedlist>
+    <para>
+      The creation of monitor connections is restricted to privileged users on
+      the bus (see above).
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Creating connections</title>
+    <para>
+      A connection to a bus is created by opening an endpoint file (see
+      <citerefentry>
+        <refentrytitle>kdbus.endpoint</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>)
+      of a bus and becoming an active client with the
+      <constant>KDBUS_CMD_HELLO</constant> ioctl. Every connection has a unique
+      identifier on the bus and can address messages to every other connection
+      on the same bus by using the peer's connection ID as the destination.
+    </para>
+    <para>
+      The <constant>KDBUS_CMD_HELLO</constant> ioctl takes a <type>struct
+      kdbus_cmd_hello</type> as argument.
+    </para>
+
+    <programlisting>
+struct kdbus_cmd_hello {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  __u64 attach_flags_send;
+  __u64 attach_flags_recv;
+  __u64 bus_flags;
+  __u64 id;
+  __u64 pool_size;
+  __u64 offset;
+  __u8 id128[16];
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem>
+          <para>Flags to apply to this connection</para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_HELLO_ACCEPT_FD</constant></term>
+              <listitem>
+                <para>
+                  When this flag is set, the connection can be sent file
+                  descriptors as message payload of unicast messages. If it's
+                  not set, an attempt to send file descriptors will result in
+                  <constant>-ECOMM</constant> on the sender's side.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_HELLO_ACTIVATOR</constant></term>
+              <listitem>
+                <para>
+                  Make this connection an activator (see above). With this bit
+                  set, an item of type <constant>KDBUS_ITEM_NAME</constant> has
+                  to be attached. This item describes the well-known name this
+                  connection should be an activator for.
+                  A connection can not be an activator and a policy holder at
+                  the same time time, so this bit is not allowed together with
+                  <constant>KDBUS_HELLO_POLICY_HOLDER</constant>.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_HELLO_POLICY_HOLDER</constant></term>
+              <listitem>
+                <para>
+                  Make this connection a policy holder (see above). With this
+                  bit set, an item of type <constant>KDBUS_ITEM_NAME</constant>
+                  has to be attached. This item describes the well-known name
+                  this connection should hold a policy for.
+                  A connection can not be an activator and a policy holder at
+                  the same time time, so this bit is not allowed together with
+                  <constant>KDBUS_HELLO_ACTIVATOR</constant>.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_HELLO_MONITOR</constant></term>
+              <listitem>
+                <para>
+                  Make this connection a monitor connection (see above).
+                </para>
+                <para>
+                  This flag can only be set by privileged bus connections. See
+                  below for more information.
+                  A connection can not be monitor and an activator or a policy
+                  holder at the same time time, so this bit is not allowed
+                  together with <constant>KDBUS_HELLO_ACTIVATOR</constant> or
+                  <constant>KDBUS_HELLO_POLICY_HOLDER</constant>.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+              <listitem>
+                <para>
+                  Requests a set of valid flags for this ioctl. When this bit is
+                  set, no action is taken; the ioctl will return
+                  <errorcode>0</errorcode>, and the <varname>flags</varname>
+                  field will have all bits set that are valid for this command.
+                  The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+                  cleared by the operation.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>attach_flags_send</varname></term>
+        <listitem><para>
+          Set the bits for metadata this connection permits to be sent to the
+          receiving peer. Only metadata items that are both allowed to be sent
+          by the sender and that are requested by the receiver will be attached
+          to the message.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>attach_flags_recv</varname></term>
+        <listitem><para>
+          Request the attachment of metadata for each message received by this
+          connection. See
+          <citerefentry>
+            <refentrytitle>kdbus</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>
+          for information about metadata, and
+          <citerefentry>
+            <refentrytitle>kdbus.item</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>
+          regarding items in general.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>bus_flags</varname></term>
+        <listitem><para>
+          Upon successful completion of the ioctl, this member will contain the
+          flags of the bus it connected to.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>id</varname></term>
+        <listitem><para>
+          Upon successful completion of the command, this member will contain
+          the numerical ID of the new connection.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>pool_size</varname></term>
+        <listitem><para>
+          The size of the communication pool, in bytes. The pool can be
+          accessed by calling
+          <citerefentry>
+            <refentrytitle>mmap</refentrytitle>
+            <manvolnum>2</manvolnum>
+          </citerefentry>
+          on the file descriptor that was used to issue the
+          <constant>KDBUS_CMD_HELLO</constant> ioctl.
+          The pool size of a connection must be greater than
+          <constant>0</constant> and a multiple of
+          <constant>PAGE_SIZE</constant>. See
+          <citerefentry>
+            <refentrytitle>kdbus.pool</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>
+          for more information.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>offset</varname></term>
+        <listitem><para>
+          The kernel will return the offset in the pool where returned details
+          will be stored. See below.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>id128</varname></term>
+        <listitem><para>
+          Upon successful completion of the ioctl, this member will contain the
+          <emphasis>128-bit UUID</emphasis> of the connected bus.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            Variable list of items containing optional additional information.
+            The following items are currently expected/valid:
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_CONN_DESCRIPTION</constant></term>
+              <listitem>
+                <para>
+                  Contains a string that describes this connection, so it can
+                  be identified later.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NAME</constant></term>
+              <term><constant>KDBUS_ITEM_POLICY_ACCESS</constant></term>
+              <listitem>
+                <para>
+                  For activators and policy holders only, combinations of
+                  these two items describe policy access entries. See
+                  <citerefentry>
+                    <refentrytitle>kdbus.policy</refentrytitle>
+                    <manvolnum>7</manvolnum>
+                  </citerefentry>
+                  for further details.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_CREDS</constant></term>
+              <term><constant>KDBUS_ITEM_PIDS</constant></term>
+              <term><constant>KDBUS_ITEM_SECLABEL</constant></term>
+              <listitem>
+                <para>
+                  Privileged bus users may submit these types in order to
+                  create connections with faked credentials. This information
+                  will be returned when peer information is queried by
+                  <constant>KDBUS_CMD_CONN_INFO</constant>. See below for more
+                  information on retrieving information on connections.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+              <listitem><para>
+                With this item, programs can <emphasis>probe</emphasis> the
+                kernel for known item types. See
+                <citerefentry>
+                  <refentrytitle>kdbus.item</refentrytitle>
+                  <manvolnum>7</manvolnum>
+                </citerefentry>
+                for more details.
+              </para></listitem>
+            </varlistentry>
+          </variablelist>
+
+          <para>
+            Unrecognized items are rejected, and the ioctl will fail with
+            <varname>errno</varname> set to <constant>EINVAL</constant>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      At the offset returned in the <varname>offset</varname> field of
+      <type>struct kdbus_cmd_hello</type>, the kernel will store items
+      of the following types:
+    </para>
+
+    <variablelist>
+      <varlistentry>
+        <term><constant>KDBUS_ITEM_BLOOM_PARAMETER</constant></term>
+        <listitem>
+          <para>
+            Bloom filter parameter as defined by the bus creator.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      The offset in the pool has to be freed with the
+      <constant>KDBUS_CMD_FREE</constant> ioctl. See
+      <citerefentry>
+        <refentrytitle>kdbus.pool</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for further information.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Retrieving information on a connection</title>
+    <para>
+      The <constant>KDBUS_CMD_CONN_INFO</constant> ioctl can be used to
+      retrieve credentials and properties of the initial creator of a
+      connection. This ioctl uses the following struct.
+    </para>
+
+    <programlisting>
+struct kdbus_cmd_info {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  __u64 id;
+  __u64 attach_flags;
+  __u64 offset;
+  __u64 info_size;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>
+          Currently, no flags are supported.
+          <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+          valid flags. If set, the ioctl will return <errorcode>0</errorcode>,
+          and the <varname>flags</varname> field is set to
+          <constant>0</constant>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>id</varname></term>
+        <listitem><para>
+          The numerical ID of the connection for which information is to be
+          retrieved. If set to a non-zero value, the
+          <constant>KDBUS_ITEM_OWNED_NAME</constant> item is ignored.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>attach_flags</varname></term>
+        <listitem><para>
+          Specifies which metadata items should be attached to the answer. See
+          <citerefentry>
+            <refentrytitle>kdbus.message</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>offset</varname></term>
+        <listitem><para>
+          When the ioctl returns, this field will contain the offset of the
+          connection information inside the caller's pool. See
+          <citerefentry>
+            <refentrytitle>kdbus.pool</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>
+          for further information.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>info_size</varname></term>
+        <listitem><para>
+          The kernel will return the size of the returned information, so
+          applications can optionally
+          <citerefentry>
+            <refentrytitle>mmap</refentrytitle>
+            <manvolnum>2</manvolnum>
+          </citerefentry>
+          specific parts of the pool. See
+          <citerefentry>
+            <refentrytitle>kdbus.pool</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>
+          for further information.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            The following items are expected for
+            <constant>KDBUS_CMD_CONN_INFO</constant>.
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_OWNED_NAME</constant></term>
+              <listitem>
+                <para>
+                  Contains the well-known name of the connection to look up as.
+                  This item is mandatory if the <varname>id</varname> field is
+                  set to 0.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+              <listitem><para>
+                With this item, programs can <emphasis>probe</emphasis> the
+                kernel for known item types. See
+                <citerefentry>
+                  <refentrytitle>kdbus.item</refentrytitle>
+                  <manvolnum>7</manvolnum>
+                </citerefentry>
+                for more details.
+              </para></listitem>
+            </varlistentry>
+          </variablelist>
+          <para>
+            Unrecognized items are rejected, and the ioctl will fail with
+            <varname>errno</varname> set to <constant>EINVAL</constant>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      When the ioctl returns, the following struct will be stored in the
+      caller's pool at <varname>offset</varname>. The fields in this struct
+      are described below.
+    </para>
+
+    <programlisting>
+struct kdbus_info {
+  __u64 size;
+  __u64 id;
+  __u64 flags;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>id</varname></term>
+        <listitem><para>
+          The connection's unique ID.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>
+          The connection's flags as specified when it was created.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            Depending on the <varname>flags</varname> field in
+            <type>struct kdbus_cmd_info</type>, items of types
+            <constant>KDBUS_ITEM_OWNED_NAME</constant> and
+            <constant>KDBUS_ITEM_CONN_DESCRIPTION</constant> may follow here.
+            <constant>KDBUS_ITEM_NEGOTIATE</constant> is also allowed.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      Once the caller is finished with parsing the return buffer, it needs to
+      employ the <constant>KDBUS_CMD_FREE</constant> command for the offset, in
+      order to free the buffer part. See
+      <citerefentry>
+        <refentrytitle>kdbus.pool</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for further information.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Getting information about a connection's bus creator</title>
+    <para>
+      The <constant>KDBUS_CMD_BUS_CREATOR_INFO</constant> ioctl takes the same
+      struct as <constant>KDBUS_CMD_CONN_INFO</constant>, but is used to
+      retrieve information about the creator of the bus the connection is
+      attached to. The metadata returned by this call is collected during the
+      creation of the bus and is never altered afterwards, so it provides
+      pristine information on the task that created the bus, at the moment when
+      it did so.
+    </para>
+    <para>
+      In response to this call, a slice in the connection's pool is allocated
+      and filled with an object of type <type>struct kdbus_info</type>,
+      pointed to by the ioctl's <varname>offset</varname> field.
+    </para>
+
+    <programlisting>
+struct kdbus_info {
+  __u64 size;
+  __u64 id;
+  __u64 flags;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>id</varname></term>
+        <listitem><para>
+          The bus ID.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>
+          The bus flags as specified when it was created.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            Metadata information is stored in items here. The item list
+            contains a <constant>KDBUS_ITEM_MAKE_NAME</constant> item that
+            indicates the bus name of the calling connection.
+            <constant>KDBUS_ITEM_NEGOTIATE</constant> is allowed to probe
+            for known item types.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      Once the caller is finished with parsing the return buffer, it needs to
+      employ the <constant>KDBUS_CMD_FREE</constant> command for the offset, in
+      order to free the buffer part. See
+      <citerefentry>
+        <refentrytitle>kdbus.pool</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for further information.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Updating connection details</title>
+    <para>
+      Some of a connection's details can be updated with the
+      <constant>KDBUS_CMD_CONN_UPDATE</constant> ioctl, using the file
+      descriptor that was used to create the connection. The update command
+      uses the following struct.
+    </para>
+
+    <programlisting>
+struct kdbus_cmd {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>
+          Currently, no flags are supported.
+          <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+          valid flags. If set, the ioctl will return <errorcode>0</errorcode>,
+          and the <varname>flags</varname> field is set to
+          <constant>0</constant>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            Items to describe the connection details to be updated. The
+            following item types are supported.
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_ATTACH_FLAGS_SEND</constant></term>
+              <listitem>
+                <para>
+                  Supply a new set of metadata items that this connection
+                  permits to be sent along with messages.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_ATTACH_FLAGS_RECV</constant></term>
+              <listitem>
+                <para>
+                  Supply a new set of metadata items that this connection
+                  requests to be attached to each message.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NAME</constant></term>
+              <term><constant>KDBUS_ITEM_POLICY_ACCESS</constant></term>
+              <listitem>
+                <para>
+                  Policy holder connections may supply a new set of policy
+                  information with these items. For other connection types,
+                  <constant>EOPNOTSUPP</constant> is returned in
+                  <varname>errno</varname>.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+              <listitem><para>
+                With this item, programs can <emphasis>probe</emphasis> the
+                kernel for known item types. See
+                <citerefentry>
+                  <refentrytitle>kdbus.item</refentrytitle>
+                  <manvolnum>7</manvolnum>
+                </citerefentry>
+                for more details.
+              </para></listitem>
+            </varlistentry>
+          </variablelist>
+
+          <para>
+            Unrecognized items are rejected, and the ioctl will fail with
+            <varname>errno</varname> set to <constant>EINVAL</constant>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Termination of connections</title>
+    <para>
+      A connection can be terminated by simply calling
+      <citerefentry>
+        <refentrytitle>close</refentrytitle>
+        <manvolnum>2</manvolnum>
+      </citerefentry>
+      on its file descriptor. All pending incoming messages will be discarded,
+      and the memory allocated by the pool will be freed.
+    </para>
+
+    <para>
+      An alternative way of closing down a connection is via the
+      <constant>KDBUS_CMD_BYEBYE</constant> ioctl. This ioctl will succeed only
+      if the message queue of the connection is empty at the time of closing;
+      otherwise, the ioctl will fail with <varname>errno</varname> set to
+      <constant>EBUSY</constant>. When this ioctl returns
+      successfully, the connection has been terminated and won't accept any new
+      messages from remote peers. This way, a connection can be terminated
+      race-free, without losing any messages. The ioctl takes an argument of
+      type <type>struct kdbus_cmd</type>.
+    </para>
+
+    <programlisting>
+struct kdbus_cmd {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>
+          Currently, no flags are supported.
+          <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+          valid flags. If set, the ioctl will fail with
+          <varname>errno</varname> set to <constant>EPROTO</constant>, and
+          the <varname>flags</varname> field is set to <constant>0</constant>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            The following item types are supported.
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+              <listitem><para>
+                With this item, programs can <emphasis>probe</emphasis> the
+                kernel for known item types. See
+                <citerefentry>
+                  <refentrytitle>kdbus.item</refentrytitle>
+                  <manvolnum>7</manvolnum>
+                </citerefentry>
+                for more details.
+              </para></listitem>
+            </varlistentry>
+          </variablelist>
+
+          <para>
+            Unrecognized items are rejected, and the ioctl will fail with
+            <varname>errno</varname> set to <constant>EINVAL</constant>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Return value</title>
+    <para>
+      On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+      on error, <errorcode>-1</errorcode> is returned, and
+      <varname>errno</varname> is set to indicate the error.
+      If the issued ioctl is illegal for the file descriptor used,
+      <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+    </para>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_HELLO</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EFAULT</constant></term>
+          <listitem><para>
+            The supplied pool size was 0 or not a multiple of the page size.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            The flags supplied in <type>struct kdbus_cmd_hello</type>
+            are invalid.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            An illegal combination of
+            <constant>KDBUS_HELLO_MONITOR</constant>,
+            <constant>KDBUS_HELLO_ACTIVATOR</constant> and
+            <constant>KDBUS_HELLO_POLICY_HOLDER</constant> was passed in
+            <varname>flags</varname>.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            An invalid set of items was supplied.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ECONNREFUSED</constant></term>
+          <listitem><para>
+            The attach_flags_send field did not satisfy the requirements of
+            the bus.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EPERM</constant></term>
+          <listitem><para>
+            A <constant>KDBUS_ITEM_CREDS</constant> items was supplied, but the
+            current user is not privileged.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ESHUTDOWN</constant></term>
+          <listitem><para>
+            The bus you were trying to connect to has already been shut down.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EMFILE</constant></term>
+          <listitem><para>
+            The maximum number of connections on the bus has been reached.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EOPNOTSUPP</constant></term>
+          <listitem><para>
+            The endpoint does not support the connection flags supplied in
+            <type>struct kdbus_cmd_hello</type>.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_BYEBYE</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EALREADY</constant></term>
+          <listitem><para>
+            The connection has already been shut down.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EBUSY</constant></term>
+          <listitem><para>
+            There are still messages queued up in the connection's pool.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_CONN_INFO</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Invalid flags, or neither an ID nor a name was provided, or the
+            name is invalid.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ESRCH</constant></term>
+          <listitem><para>
+            Connection lookup by name failed.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ENXIO</constant></term>
+          <listitem><para>
+            No connection with the provided connection ID found.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_CONN_UPDATE</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Illegal flags or items.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Wildcards submitted in policy entries, or illegal sequence
+            of policy items.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EOPNOTSUPP</constant></term>
+          <listitem><para>
+            Operation not supported by connection.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>E2BIG</constant></term>
+          <listitem><para>
+            Too many policy items attached.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>See Also</title>
+    <simplelist type="inline">
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.bus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.endpoint</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.message</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.name</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.policy</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.pool</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.item</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+    </simplelist>
+  </refsect1>
+</refentry>
diff -Nur linux-4.2/Documentation/kdbus/kdbus.endpoint.xml linux-4.2-pck/Documentation/kdbus/kdbus.endpoint.xml
--- linux-4.2/Documentation/kdbus/kdbus.endpoint.xml	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/kdbus.endpoint.xml	2015-09-08 00:48:22.208364829 -0300
@@ -0,0 +1,429 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.endpoint">
+
+  <refentryinfo>
+    <title>kdbus.endpoint</title>
+    <productname>kdbus.endpoint</productname>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>kdbus.endpoint</refentrytitle>
+    <manvolnum>7</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>kdbus.endpoint</refname>
+    <refpurpose>kdbus endpoint</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Endpoints are entry points to a bus (see
+      <citerefentry>
+        <refentrytitle>kdbus.bus</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>).
+      By default, each bus has a default
+      endpoint called 'bus'. The bus owner has the ability to create custom
+      endpoints with specific names, permissions, and policy databases
+      (see below). An endpoint is presented as file underneath the directory
+      of the parent bus.
+    </para>
+    <para>
+      To create a custom endpoint, open the default endpoint
+      (<literal>bus</literal>) and use the
+      <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> ioctl with
+      <type>struct kdbus_cmd</type>. Custom endpoints always have a policy
+      database that, by default, forbids any operation. You have to explicitly
+      install policy entries to allow any operation on this endpoint.
+    </para>
+    <para>
+      Once <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> succeeded, the new
+      endpoint will appear in the filesystem
+      (<citerefentry>
+        <refentrytitle>kdbus.bus</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>), and the used file descriptor will manage the
+      newly created endpoint resource. It cannot be used to manage further
+      resources and must be kept open as long as the endpoint is needed. The
+      endpoint will be terminated as soon as the file descriptor is closed.
+    </para>
+    <para>
+      Endpoint names may be chosen freely except for one restriction: the name
+      must be prefixed with the numeric effective UID of the creator and a dash.
+      This is required to avoid namespace clashes between different users. When
+      creating an endpoint, the name that is passed in must be properly
+      formatted or the kernel will refuse creation of the endpoint. Example:
+      <literal>1047-my-endpoint</literal> is an acceptable name for an
+      endpoint registered by a user with UID 1047. However,
+      <literal>1024-my-endpoint</literal> is not, and neither is
+      <literal>my-endpoint</literal>. The UID must be provided in the
+      user-namespace of the bus.
+    </para>
+    <para>
+      To create connections to a bus, use <constant>KDBUS_CMD_HELLO</constant>
+      on a file descriptor returned by <function>open()</function> on an
+      endpoint node. See
+      <citerefentry>
+        <refentrytitle>kdbus.connection</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for further details.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Creating custom endpoints</title>
+    <para>
+      To create a new endpoint, the
+      <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> command is used. Along with
+      the endpoint's name, which will be used to expose the endpoint in the
+      <citerefentry>
+        <refentrytitle>kdbus.fs</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>,
+      the command also optionally takes items to set up the endpoint's
+      <citerefentry>
+        <refentrytitle>kdbus.policy</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>.
+      <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> takes a
+      <type>struct kdbus_cmd</type> argument.
+    </para>
+    <programlisting>
+struct kdbus_cmd {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>The flags for creation.</para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_MAKE_ACCESS_GROUP</constant></term>
+              <listitem>
+                <para>Make the endpoint file group-accessible.</para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_MAKE_ACCESS_WORLD</constant></term>
+              <listitem>
+                <para>Make the endpoint file world-accessible.</para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+              <listitem>
+                <para>
+                  Requests a set of valid flags for this ioctl. When this bit is
+                  set, no action is taken; the ioctl will return
+                  <errorcode>0</errorcode>, and the <varname>flags</varname>
+                  field will have all bits set that are valid for this command.
+                  The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+                  cleared by the operation.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            The following items are expected for
+            <constant>KDBUS_CMD_ENDPOINT_MAKE</constant>.
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_MAKE_NAME</constant></term>
+              <listitem>
+                <para>Contains a string to identify the endpoint name.</para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NAME</constant></term>
+              <term><constant>KDBUS_ITEM_POLICY_ACCESS</constant></term>
+              <listitem>
+                <para>
+                  These items are used to set the policy attached to the
+                  endpoint. For more details on bus and endpoint policies, see
+                  <citerefentry>
+                    <refentrytitle>kdbus.policy</refentrytitle>
+                    <manvolnum>7</manvolnum>
+                  </citerefentry>.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+          <para>
+            Unrecognized items are rejected, and the ioctl will fail with
+            <varname>errno</varname> set to <varname>EINVAL</varname>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Updating endpoints</title>
+    <para>
+      To update an existing endpoint, the
+      <constant>KDBUS_CMD_ENDPOINT_UPDATE</constant> command is used on the file
+      descriptor that was used to create the endpoint, using
+      <constant>KDBUS_CMD_ENDPOINT_MAKE</constant>. The only relevant detail of
+      the endpoint that can be updated is the policy. When the command is
+      employed, the policy of the endpoint is <emphasis>replaced</emphasis>
+      atomically with the new set of rules.
+      The command takes a <type>struct kdbus_cmd</type> argument.
+    </para>
+    <programlisting>
+struct kdbus_cmd {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>
+          Unused for this command.
+          <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+          valid flags. If set, the ioctl will return <errorcode>0</errorcode>,
+          and the <varname>flags</varname> field is set to
+          <constant>0</constant>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            The following items are expected for
+            <constant>KDBUS_CMD_ENDPOINT_UPDATE</constant>.
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NAME</constant></term>
+              <term><constant>KDBUS_ITEM_POLICY_ACCESS</constant></term>
+              <listitem>
+                <para>
+                  These items are used to set the policy attached to the
+                  endpoint. For more details on bus and endpoint policies, see
+                  <citerefentry>
+                    <refentrytitle>kdbus.policy</refentrytitle>
+                    <manvolnum>7</manvolnum>
+                  </citerefentry>.
+                  Existing policy is atomically replaced with the new rules
+                  provided.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+              <listitem><para>
+                With this item, programs can <emphasis>probe</emphasis> the
+                kernel for known item types. See
+                <citerefentry>
+                  <refentrytitle>kdbus.item</refentrytitle>
+                  <manvolnum>7</manvolnum>
+                </citerefentry>
+                for more details.
+              </para></listitem>
+            </varlistentry>
+          </variablelist>
+          <para>
+            Unrecognized items are rejected, and the ioctl will fail with
+            <varname>errno</varname> set to <constant>EINVAL</constant>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Return value</title>
+    <para>
+      On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+      on error, <errorcode>-1</errorcode> is returned, and
+      <varname>errno</varname> is set to indicate the error.
+      If the issued ioctl is illegal for the file descriptor used,
+      <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+    </para>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> may fail with the
+        following errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            The flags supplied in the <type>struct kdbus_cmd</type>
+            are invalid.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Illegal combination of <constant>KDBUS_ITEM_NAME</constant> and
+            <constant>KDBUS_ITEM_POLICY_ACCESS</constant> was provided.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EEXIST</constant></term>
+          <listitem><para>
+            An endpoint of that name already exists.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EPERM</constant></term>
+          <listitem><para>
+            The calling user is not privileged. See
+            <citerefentry>
+              <refentrytitle>kdbus</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for information about privileged users.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_ENDPOINT_UPDATE</constant> may fail with the
+        following errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            The flags supplied in <type>struct kdbus_cmd</type>
+            are invalid.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Illegal combination of <constant>KDBUS_ITEM_NAME</constant> and
+            <constant>KDBUS_ITEM_POLICY_ACCESS</constant> was provided.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>See Also</title>
+    <simplelist type="inline">
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.bus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.endpoint</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.fs</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.item</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.message</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.name</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.pool</refentrytitle>
+           <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+    </simplelist>
+  </refsect1>
+</refentry>
diff -Nur linux-4.2/Documentation/kdbus/kdbus.fs.xml linux-4.2-pck/Documentation/kdbus/kdbus.fs.xml
--- linux-4.2/Documentation/kdbus/kdbus.fs.xml	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/kdbus.fs.xml	2015-09-08 00:48:22.208364829 -0300
@@ -0,0 +1,124 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus_fs">
+
+  <refentryinfo>
+    <title>kdbus.fs</title>
+    <productname>kdbus.fs</productname>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>kdbus.fs</refentrytitle>
+    <manvolnum>7</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>kdbus.fs</refname>
+    <refpurpose>kdbus file system</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>File-system Layout</title>
+
+    <para>
+      The <emphasis>kdbusfs</emphasis> pseudo filesystem provides access to
+      kdbus entities, such as <emphasis>buses</emphasis> and
+      <emphasis>endpoints</emphasis>. Each time the filesystem is mounted,
+      a new, isolated kdbus instance is created, which is independent from the
+      other instances.
+    </para>
+    <para>
+      The system-wide standard mount point for <emphasis>kdbusfs</emphasis> is
+      <constant>/sys/fs/kdbus</constant>.
+    </para>
+
+    <para>
+      Buses are represented as directories in the file system layout, whereas
+      endpoints are exposed as files inside these directories. At the top-level,
+      a <emphasis>control</emphasis> node is present, which can be opened to
+      create new buses via the <constant>KDBUS_CMD_BUS_MAKE</constant> ioctl.
+      Each <emphasis>bus</emphasis> shows a default endpoint called
+      <varname>bus</varname>, which can be opened to either create a connection
+      with the <constant>KDBUS_CMD_HELLO</constant> ioctl, or to create new
+      custom endpoints for the bus with
+      <constant>KDBUS_CMD_ENDPOINT_MAKE</constant>. See
+      <citerefentry>
+        <refentrytitle>kdbus.bus</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>,
+      <citerefentry>
+        <refentrytitle>kdbus.connection</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry> and
+      <citerefentry>
+        <refentrytitle>kdbus.endpoint</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for more details.
+    </para>
+
+    <para>Following, you can see an example layout of the
+    <emphasis>kdbusfs</emphasis> filesystem:</para>
+
+<programlisting>
+        /sys/fs/kdbus/                          ; mount-point
+        |-- 0-system                            ; bus directory
+        |   |-- bus                             ; default endpoint
+        |   `-- 1017-custom                     ; custom endpoint
+        |-- 1000-user                           ; bus directory
+        |   |-- bus                             ; default endpoint
+        |   |-- 1000-service-A                  ; custom endpoint
+        |   `-- 1000-service-B                  ; custom endpoint
+        `-- control                             ; control file
+</programlisting>
+  </refsect1>
+
+  <refsect1>
+    <title>Mounting instances</title>
+    <para>
+      In order to get a new and separate kdbus environment, a new instance
+      of <emphasis>kdbusfs</emphasis> can be mounted like this:
+    </para>
+<programlisting>
+  # mount -t kdbusfs kdbusfs /tmp/new_kdbus/
+</programlisting>
+  </refsect1>
+
+  <refsect1>
+    <title>See Also</title>
+    <simplelist type="inline">
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.bus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.connection</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.endpoint</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>mount</refentrytitle>
+          <manvolnum>8</manvolnum>
+        </citerefentry>
+      </member>
+    </simplelist>
+  </refsect1>
+</refentry>
diff -Nur linux-4.2/Documentation/kdbus/kdbus.item.xml linux-4.2-pck/Documentation/kdbus/kdbus.item.xml
--- linux-4.2/Documentation/kdbus/kdbus.item.xml	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/kdbus.item.xml	2015-09-08 00:48:22.208364829 -0300
@@ -0,0 +1,839 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus">
+
+  <refentryinfo>
+    <title>kdbus.item</title>
+    <productname>kdbus item</productname>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>kdbus.item</refentrytitle>
+    <manvolnum>7</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>kdbus.item</refname>
+    <refpurpose>kdbus item structure, layout and usage</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      To flexibly augment transport structures, data blobs of type
+      <type>struct kdbus_item</type> can be attached to the structs passed
+      into the ioctls. Some ioctls make items of certain types mandatory,
+      others are optional. Items that are unsupported by ioctls they are
+      attached to will cause the ioctl to fail with <varname>errno</varname>
+      set to <constant>EINVAL</constant>.
+      Items are also used for information stored in a connection's
+      <emphasis>pool</emphasis>, such as received messages, name lists or
+      requested connection or bus owner information. Depending on the type of
+      an item, its total size is either fixed or variable.
+    </para>
+
+    <refsect2>
+      <title>Chaining items</title>
+      <para>
+        Whenever items are used as part of the kdbus kernel API, they are
+        embedded in structs that are embedded inside structs that themselves
+        include a size field containing the overall size of the structure.
+        This allows multiple items to be chained up, and an item iterator
+        (see below) is capable of detecting the end of an item chain.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Alignment</title>
+      <para>
+        The kernel expects all items to be aligned to 8-byte boundaries.
+        Unaligned items will cause the ioctl they are used with to fail
+        with <varname>errno</varname> set to <constant>EINVAL</constant>.
+        An item that has an unaligned size itself hence needs to be padded
+        if it is followed by another item.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Iterating items</title>
+      <para>
+        A simple iterator would iterate over the items until the items have
+        reached the embedding structure's overall size. An example
+        implementation is shown below.
+      </para>
+
+      <programlisting><![CDATA[
+#define KDBUS_ALIGN8(val) (((val) + 7) & ~7)
+
+#define KDBUS_ITEM_NEXT(item) \
+    (typeof(item))((uint8_t *)(item) + KDBUS_ALIGN8((item)->size))
+
+#define KDBUS_ITEM_FOREACH(item, head, first)                      \
+    for ((item) = (head)->first;                                   \
+         ((uint8_t *)(item) < (uint8_t *)(head) + (head)->size) && \
+          ((uint8_t *)(item) >= (uint8_t *)(head));                \
+         (item) = KDBUS_ITEM_NEXT(item))
+      ]]></programlisting>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>Item layout</title>
+    <para>
+      A <type>struct kdbus_item</type> consists of a
+      <varname>size</varname> field, describing its overall size, and a
+      <varname>type</varname> field, both 64 bit wide. They are followed by
+      a union to store information that is specific to the item's type.
+      The struct layout is shown below.
+    </para>
+
+    <programlisting>
+struct kdbus_item {
+  __u64 size;
+  __u64 type;
+  /* item payload - see below */
+  union {
+    __u8 data[0];
+    __u32 data32[0];
+    __u64 data64[0];
+    char str[0];
+
+    __u64 id;
+    struct kdbus_vec vec;
+    struct kdbus_creds creds;
+    struct kdbus_pids pids;
+    struct kdbus_audit audit;
+    struct kdbus_caps caps;
+    struct kdbus_timestamp timestamp;
+    struct kdbus_name name;
+    struct kdbus_bloom_parameter bloom_parameter;
+    struct kdbus_bloom_filter bloom_filter;
+    struct kdbus_memfd memfd;
+    int fds[0];
+    struct kdbus_notify_name_change name_change;
+    struct kdbus_notify_id_change id_change;
+    struct kdbus_policy_access policy_access;
+  };
+};
+    </programlisting>
+
+    <para>
+      <type>struct kdbus_item</type> should never be used to allocate
+      an item instance, as its size may grow in future releases of the API.
+      Instead, it should be manually assembled by storing the
+      <varname>size</varname>, <varname>type</varname> and payload to a
+      struct of its own.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Item types</title>
+
+    <refsect2>
+      <title>Negotiation item</title>
+      <variablelist>
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+          <listitem><para>
+            With this item is attached to any ioctl, programs can
+            <emphasis>probe</emphasis> the kernel for known item types.
+            The item carries an array of <type>uint64_t</type> values in
+            <varname>item.data64</varname>, each set to an item type to
+            probe. The kernel will reset each member of this array that is
+            not recognized as valid item type to <constant>0</constant>.
+            This way, users can negotiate kernel features at start-up to
+            keep newer userspace compatible with older kernels. This item
+            is never attached by the kernel in response to any command.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>Command specific items</title>
+      <variablelist>
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_PAYLOAD_VEC</constant></term>
+          <term><constant>KDBUS_ITEM_PAYLOAD_OFF</constant></term>
+          <listitem><para>
+            Messages are directly copied by the sending process into the
+            receiver's
+            <citerefentry>
+              <refentrytitle>kdbus.pool</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+            This way, two peers can exchange data by effectively doing a
+            single-copy from one process to another; the kernel will not buffer
+            the data anywhere else. <constant>KDBUS_ITEM_PAYLOAD_VEC</constant>
+            is used when <emphasis>sending</emphasis> message. The item
+            references a memory address when the payload data can be found.
+            <constant>KDBUS_ITEM_PAYLOAD_OFF</constant> is used when messages
+            are <emphasis>received</emphasis>, and the
+            <constant>offset</constant> value describes the offset inside the
+            receiving connection's
+            <citerefentry>
+              <refentrytitle>kdbus.pool</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            where the message payload can be found. See
+            <citerefentry>
+              <refentrytitle>kdbus.message</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on passing of payload data along with a
+            message.
+            <programlisting>
+struct kdbus_vec {
+  __u64 size;
+  union {
+    __u64 address;
+    __u64 offset;
+  };
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant></term>
+          <listitem><para>
+            Transports a file descriptor of a <emphasis>memfd</emphasis> in
+            <type>struct kdbus_memfd</type> in <varname>item.memfd</varname>.
+            The <varname>size</varname> field has to match the actual size of
+            the memfd that was specified when it was created. The
+            <varname>start</varname> parameter denotes the offset inside the
+            memfd at which the referenced payload starts. See
+            <citerefentry>
+              <refentrytitle>kdbus.message</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on passing of payload data along with a
+            message.
+            <programlisting>
+struct kdbus_memfd {
+  __u64 start;
+  __u64 size;
+  int fd;
+  __u32 __pad;
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_FDS</constant></term>
+          <listitem><para>
+            Contains an array of <emphasis>file descriptors</emphasis>.
+            When used with <constant>KDBUS_CMD_SEND</constant>, the values of
+            this array must be filled with valid file descriptor numbers.
+            When received as item attached to a message, the array will
+            contain the numbers of the installed file descriptors, or
+            <constant>-1</constant> in case an error occurred.
+            In either case, the number of entries in the array is derived from
+            the item's total size. See
+            <citerefentry>
+              <refentrytitle>kdbus.message</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>Items specific to some commands</title>
+      <variablelist>
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_CANCEL_FD</constant></term>
+          <listitem><para>
+            Transports a file descriptor that can be used to cancel a
+            synchronous <constant>KDBUS_CMD_SEND</constant> operation by
+            writing to it. The file descriptor is stored in
+            <varname>item.fd[0]</varname>. The item may only contain one
+            file descriptor. See
+            <citerefentry>
+              <refentrytitle>kdbus.message</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on this item and how to use it.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_BLOOM_PARAMETER</constant></term>
+          <listitem><para>
+            Contains a set of <emphasis>bloom parameters</emphasis> as
+            <type>struct kdbus_bloom_parameter</type> in
+            <varname>item.bloom_parameter</varname>.
+            The item is passed from userspace to kernel during the
+            <constant>KDBUS_CMD_BUS_MAKE</constant> ioctl, and returned
+            verbatim when <constant>KDBUS_CMD_HELLO</constant> is called.
+            The kernel does not use the bloom parameters, but they need to
+            be known by each connection on the bus in order to define the
+            bloom filter hash details. See
+            <citerefentry>
+              <refentrytitle>kdbus.match</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on matching and bloom filters.
+            <programlisting>
+struct kdbus_bloom_parameter {
+  __u64 size;
+  __u64 n_hash;
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_BLOOM_FILTER</constant></term>
+          <listitem><para>
+            Carries a <emphasis>bloom filter</emphasis> as
+            <type>struct kdbus_bloom_filter</type> in
+            <varname>item.bloom_filter</varname>. It is mandatory to send this
+            item attached to a <type>struct kdbus_msg</type>, in case the
+            message is a signal. This item is never transported from kernel to
+            userspace. See
+            <citerefentry>
+              <refentrytitle>kdbus.match</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on matching and bloom filters.
+            <programlisting>
+struct kdbus_bloom_filter {
+  __u64 generation;
+  __u64 data[0];
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_BLOOM_MASK</constant></term>
+          <listitem><para>
+            Transports a <emphasis>bloom mask</emphasis> as binary data blob
+            stored in <varname>item.data</varname>. This item is used to
+            describe a match into a connection's match database. See
+            <citerefentry>
+              <refentrytitle>kdbus.match</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on matching and bloom filters.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_DST_NAME</constant></term>
+          <listitem><para>
+            Contains a <emphasis>well-known name</emphasis> to send a
+            message to, as null-terminated string in
+            <varname>item.str</varname>. This item is used with
+            <constant>KDBUS_CMD_SEND</constant>. See
+            <citerefentry>
+              <refentrytitle>kdbus.message</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on how to send a message.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_MAKE_NAME</constant></term>
+          <listitem><para>
+            Contains a <emphasis>bus name</emphasis> or
+            <emphasis>endpoint name</emphasis>, stored as null-terminated
+            string in <varname>item.str</varname>. This item is sent from
+            userspace to kernel when buses or endpoints are created, and
+            returned back to userspace when the bus creator information is
+            queried. See
+            <citerefentry>
+              <refentrytitle>kdbus.bus</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            and
+            <citerefentry>
+              <refentrytitle>kdbus.endpoint</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_ATTACH_FLAGS_SEND</constant></term>
+          <term><constant>KDBUS_ITEM_ATTACH_FLAGS_RECV</constant></term>
+          <listitem><para>
+            Contains a set of <emphasis>attach flags</emphasis> at
+            <emphasis>send</emphasis> or <emphasis>receive</emphasis> time. See
+            <citerefentry>
+              <refentrytitle>kdbus</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>,
+            <citerefentry>
+              <refentrytitle>kdbus.bus</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry> and
+            <citerefentry>
+              <refentrytitle>kdbus.connection</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on attach flags.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_ID</constant></term>
+          <listitem><para>
+            Transports a connection's <emphasis>numerical ID</emphasis> of
+            a connection as <type>uint64_t</type> value in
+            <varname>item.id</varname>.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_NAME</constant></term>
+          <listitem><para>
+            Transports a name associated with the
+            <emphasis>name registry</emphasis> as null-terminated string as
+            <type>struct kdbus_name</type> in
+            <varname>item.name</varname>. The <varname>flags</varname>
+            contains the flags of the name. See
+            <citerefentry>
+              <refentrytitle>kdbus.name</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on how to access the name registry of a bus.
+            <programlisting>
+struct kdbus_name {
+  __u64 flags;
+  char name[0];
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>Items attached by the kernel as metadata</title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_TIMESTAMP</constant></term>
+          <listitem><para>
+            Contains both the <emphasis>monotonic</emphasis> and the
+            <emphasis>realtime</emphasis> timestamp, taken when the message
+            was processed on the kernel side.
+            Stored as <type>struct kdbus_timestamp</type> in
+            <varname>item.timestamp</varname>.
+            <programlisting>
+struct kdbus_timestamp {
+  __u64 seqnum;
+  __u64 monotonic_ns;
+  __u64 realtime_ns;
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_CREDS</constant></term>
+          <listitem><para>
+            Contains a set of <emphasis>user</emphasis> and
+            <emphasis>group</emphasis> information as 32-bit values, in the
+            usual four flavors: real, effective, saved and filesystem related.
+            Stored as <type>struct kdbus_creds</type> in
+            <varname>item.creds</varname>.
+            <programlisting>
+struct kdbus_creds {
+  __u32 uid;
+  __u32 euid;
+  __u32 suid;
+  __u32 fsuid;
+  __u32 gid;
+  __u32 egid;
+  __u32 sgid;
+  __u32 fsgid;
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_PIDS</constant></term>
+          <listitem><para>
+            Contains the <emphasis>PID</emphasis>, <emphasis>TID</emphasis>
+            and <emphasis>parent PID (PPID)</emphasis> of a remote peer.
+            Stored as <type>struct kdbus_pids</type> in
+            <varname>item.pids</varname>.
+            <programlisting>
+struct kdbus_pids {
+  __u64 pid;
+  __u64 tid;
+  __u64 ppid;
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_AUXGROUPS</constant></term>
+          <listitem><para>
+            Contains the <emphasis>auxiliary (supplementary) groups</emphasis>
+            a remote peer is a member of, stored as array of
+            <type>uint32_t</type> values in <varname>item.data32</varname>.
+            The array length can be determined by looking at the item's total
+            size, subtracting the size of the header and dividing the
+            remainder by <constant>sizeof(uint32_t)</constant>.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_OWNED_NAME</constant></term>
+          <listitem><para>
+            Contains a <emphasis>well-known name</emphasis> currently owned
+            by a connection. The name is stored as null-terminated string in
+            <varname>item.str</varname>. Its length can also be derived from
+            the item's total size.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_TID_COMM</constant> [*]</term>
+          <listitem><para>
+            Contains the <emphasis>comm</emphasis> string of a task's
+            <emphasis>TID</emphasis> (thread ID), stored as null-terminated
+            string in <varname>item.str</varname>. Its length can also be
+            derived from the item's total size. Receivers of this item should
+            not use its contents for any kind of security measures. See below.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_PID_COMM</constant> [*]</term>
+          <listitem><para>
+            Contains the <emphasis>comm</emphasis> string of a task's
+            <emphasis>PID</emphasis> (process ID), stored as null-terminated
+            string in <varname>item.str</varname>. Its length can also be
+            derived from the item's total size. Receivers of this item should
+            not use its contents for any kind of security measures. See below.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_EXE</constant> [*]</term>
+          <listitem><para>
+            Contains the <emphasis>path to the executable</emphasis> of a task,
+            stored as null-terminated string in <varname>item.str</varname>. Its
+            length can also be derived from the item's total size. Receivers of
+            this item should not use its contents for any kind of security
+            measures. See below.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_CMDLINE</constant> [*]</term>
+          <listitem><para>
+            Contains the <emphasis>command line arguments</emphasis> of a
+            task, stored as an <emphasis>array</emphasis> of null-terminated
+            strings in <varname>item.str</varname>. The total length of all
+            strings in the array can be derived from the item's total size.
+            Receivers of this item should not use its contents for any kind
+            of security measures. See below.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_CGROUP</constant></term>
+          <listitem><para>
+            Contains the <emphasis>cgroup path</emphasis> of a task, stored
+            as null-terminated string in <varname>item.str</varname>. Its
+            length can also be derived from the item's total size.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_CAPS</constant></term>
+          <listitem><para>
+            Contains sets of <emphasis>capabilities</emphasis>, stored as
+            <type>struct kdbus_caps</type> in <varname>item.caps</varname>.
+            As the item size may increase in the future, programs should be
+            written in a way that it takes
+            <varname>item.caps.last_cap</varname> into account, and derive
+            the number of sets and rows from the item size and the reported
+            number of valid capability bits.
+            <programlisting>
+struct kdbus_caps {
+  __u32 last_cap;
+  __u32 caps[0];
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_SECLABEL</constant></term>
+          <listitem><para>
+            Contains the <emphasis>LSM label</emphasis> of a task, stored as
+            null-terminated string in <varname>item.str</varname>. Its length
+            can also be derived from the item's total size.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_AUDIT</constant></term>
+          <listitem><para>
+            Contains the audit <emphasis>sessionid</emphasis> and
+            <emphasis>loginuid</emphasis> of a task, stored as
+            <type>struct kdbus_audit</type> in
+            <varname>item.audit</varname>.
+            <programlisting>
+struct kdbus_audit {
+  __u32 sessionid;
+  __u32 loginuid;
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_CONN_DESCRIPTION</constant></term>
+          <listitem><para>
+            Contains the <emphasis>connection description</emphasis>, as set
+            by <constant>KDBUS_CMD_HELLO</constant> or
+            <constant>KDBUS_CMD_CONN_UPDATE</constant>, stored as
+            null-terminated string in <varname>item.str</varname>. Its length
+            can also be derived from the item's total size.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+
+      <para>
+        All metadata is automatically translated into the
+        <emphasis>namespaces</emphasis> of the task that receives them. See
+        <citerefentry>
+          <refentrytitle>kdbus.message</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for more information.
+      </para>
+
+      <para>
+        [*] Note that the content stored in metadata items of type
+        <constant>KDBUS_ITEM_TID_COMM</constant>,
+        <constant>KDBUS_ITEM_PID_COMM</constant>,
+        <constant>KDBUS_ITEM_EXE</constant> and
+        <constant>KDBUS_ITEM_CMDLINE</constant>
+        can easily be tampered by the sending tasks. Therefore, they should
+        <emphasis>not</emphasis> be used for any sort of security relevant
+        assumptions. The only reason they are transmitted is to let
+        receivers know about details that were set when metadata was
+        collected, even though the task they were collected from is not
+        active any longer when the items are received.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Items used for policy entries, matches and notifications</title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_POLICY_ACCESS</constant></term>
+          <listitem><para>
+            This item describes a <emphasis>policy access</emphasis> entry to
+            access the policy database of a
+            <citerefentry>
+              <refentrytitle>kdbus.bus</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry> or
+            <citerefentry>
+              <refentrytitle>kdbus.endpoint</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+            Please refer to
+            <citerefentry>
+              <refentrytitle>kdbus.policy</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on the policy database and how to access it.
+            <programlisting>
+struct kdbus_policy_access {
+  __u64 type;
+  __u64 access;
+  __u64 id;
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_ID_ADD</constant></term>
+          <term><constant>KDBUS_ITEM_ID_REMOVE</constant></term>
+          <listitem><para>
+            This item is sent as attachment to a
+            <emphasis>kernel notification</emphasis> and indicates that a
+            new connection was created on the bus, or that a connection was
+            disconnected, respectively. It stores a
+            <type>struct kdbus_notify_id_change</type> in
+            <varname>item.id_change</varname>.
+            The <varname>id</varname> field contains the numeric ID of the
+            connection that was added or removed, and <varname>flags</varname>
+            is set to the connection flags, as passed by
+            <constant>KDBUS_CMD_HELLO</constant>. See
+            <citerefentry>
+              <refentrytitle>kdbus.match</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            and
+            <citerefentry>
+              <refentrytitle>kdbus.message</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on matches and notification messages.
+            <programlisting>
+struct kdbus_notify_id_change {
+  __u64 id;
+  __u64 flags;
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_NAME_ADD</constant></term>
+          <term><constant>KDBUS_ITEM_NAME_REMOVE</constant></term>
+          <term><constant>KDBUS_ITEM_NAME_CHANGE</constant></term>
+          <listitem><para>
+            This item is sent as attachment to a
+            <emphasis>kernel notification</emphasis> and indicates that a
+            <emphasis>well-known name</emphasis> appeared, disappeared or
+            transferred to another owner on the bus. It stores a
+            <type>struct kdbus_notify_name_change</type> in
+            <varname>item.name_change</varname>.
+            <varname>old_id</varname> describes the former owner of the name
+            and is set to <constant>0</constant> values in case of
+            <constant>KDBUS_ITEM_NAME_ADD</constant>.
+            <varname>new_id</varname> describes the new owner of the name and
+            is set to <constant>0</constant> values in case of
+            <constant>KDBUS_ITEM_NAME_REMOVE</constant>.
+            The <varname>name</varname> field contains the well-known name the
+            notification is about, as null-terminated string. See
+            <citerefentry>
+              <refentrytitle>kdbus.match</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            and
+            <citerefentry>
+              <refentrytitle>kdbus.message</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information on matches and notification messages.
+            <programlisting>
+struct kdbus_notify_name_change {
+  struct kdbus_notify_id_change old_id;
+  struct kdbus_notify_id_change new_id;
+  char name[0];
+};
+            </programlisting>
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_REPLY_TIMEOUT</constant></term>
+          <listitem><para>
+            This item is sent as attachment to a
+            <emphasis>kernel notification</emphasis>. It informs the receiver
+            that an expected reply to a message was not received in time.
+            The remote peer ID and the message cookie are stored in the message
+            header. See
+            <citerefentry>
+              <refentrytitle>kdbus.message</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information about messages, timeouts and notifications.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ITEM_REPLY_DEAD</constant></term>
+          <listitem><para>
+            This item is sent as attachment to a
+            <emphasis>kernel notification</emphasis>. It informs the receiver
+            that a remote connection a reply is expected from was disconnected
+            before that reply was sent. The remote peer ID and the message
+            cookie are stored in the message header. See
+            <citerefentry>
+              <refentrytitle>kdbus.message</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for more information about messages, timeouts and notifications.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>See Also</title>
+    <simplelist type="inline">
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.bus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.connection</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.endpoint</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.fs</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.message</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.name</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.pool</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>memfd_create</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+    </simplelist>
+  </refsect1>
+
+</refentry>
diff -Nur linux-4.2/Documentation/kdbus/kdbus.match.xml linux-4.2-pck/Documentation/kdbus/kdbus.match.xml
--- linux-4.2/Documentation/kdbus/kdbus.match.xml	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/kdbus.match.xml	2015-09-08 00:48:22.208364829 -0300
@@ -0,0 +1,555 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.match">
+
+  <refentryinfo>
+    <title>kdbus.match</title>
+    <productname>kdbus.match</productname>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>kdbus.match</refentrytitle>
+    <manvolnum>7</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>kdbus.match</refname>
+    <refpurpose>kdbus match</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      kdbus connections can install matches in order to subscribe to signal
+      messages sent on the bus. Such signal messages can be either directed
+      to a single connection (by setting a specific connection ID in
+      <varname>struct kdbus_msg.dst_id</varname> or by sending it to a
+      well-known name), or to potentially <emphasis>all</emphasis> currently
+      active connections on the bus (by setting
+      <varname>struct kdbus_msg.dst_id</varname> to
+      <constant>KDBUS_DST_ID_BROADCAST</constant>).
+      A signal message always has the <constant>KDBUS_MSG_SIGNAL</constant>
+      bit set in the <varname>flags</varname> bitfield.
+      Also, signal messages can originate from either the kernel (called
+      <emphasis>notifications</emphasis>), or from other bus connections.
+      In either case, a bus connection needs to have a suitable
+      <emphasis>match</emphasis> installed in order to receive any signal
+      message. Without any rules installed in the connection, no signal message
+      will be received.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Matches for signal messages from other connections</title>
+    <para>
+      Matches for messages from other connections (not kernel notifications)
+      are implemented as bloom filters (see below). The sender adds certain
+      properties of the message as elements to a bloom filter bit field, and
+      sends that along with the signal message.
+
+      The receiving connection adds the message properties it is interested in
+      as elements to a bloom mask bit field, and uploads the mask as match rule,
+      possibly along with some other rules to further limit the match.
+
+      The kernel will match the signal message's bloom filter against the
+      connection's bloom mask (simply by &amp;-ing it), and will decide whether
+      the message should be delivered to a connection.
+    </para>
+    <para>
+      The kernel has no notion of any specific properties of the signal message,
+      all it sees are the bit fields of the bloom filter and the mask to match
+      against. The use of bloom filters allows simple and efficient matching,
+      without exposing any message properties or internals to the kernel side.
+      Clients need to deal with the fact that they might receive signal messages
+      which they did not subscribe to, as the bloom filter might allow
+      false-positives to pass the filter.
+
+      To allow the future extension of the set of elements in the bloom filter,
+      the filter specifies a <emphasis>generation</emphasis> number. A later
+      generation must always contain all elements of the set of the previous
+      generation, but can add new elements to the set. The match rules mask can
+      carry an array with all previous generations of masks individually stored.
+      When the filter and mask are matched by the kernel, the mask with the
+      closest matching generation is selected as the index into the mask array.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Bloom filters</title>
+    <para>
+      Bloom filters allow checking whether a given word is present in a
+      dictionary.  This allows connections to set up a mask for information it
+      is interested in, and will be delivered signal messages that have a
+      matching filter.
+
+      For general information, see
+      <ulink url="https://en.wikipedia.org/wiki/Bloom_filter">the Wikipedia
+      article on bloom filters</ulink>.
+    </para>
+    <para>
+      The size of the bloom filter is defined per bus when it is created, in
+      <varname>kdbus_bloom_parameter.size</varname>. All bloom filters attached
+      to signal messages on the bus must match this size, and all bloom filter
+      matches uploaded by connections must also match the size, or a multiple
+      thereof (see below).
+
+      The calculation of the mask has to be done in userspace applications. The
+      kernel just checks the bitmasks to decide whether or not to let the
+      message pass. All bits in the mask must match the filter in and bit-wise
+      <emphasis>AND</emphasis> logic, but the mask may have more bits set than
+      the filter. Consequently, false positive matches are expected to happen,
+      and programs must deal with that fact by checking the contents of the
+      payload again at receive time.
+    </para>
+    <para>
+      Masks are entities that are always passed to the kernel as part of a
+      match (with an item of type <constant>KDBUS_ITEM_BLOOM_MASK</constant>),
+      and filters can be attached to signals, with an item of type
+      <constant>KDBUS_ITEM_BLOOM_FILTER</constant>. For a filter to match, all
+      its bits have to be set in the match mask as well.
+    </para>
+    <para>
+      For example, consider a bus that has a bloom size of 8 bytes, and the
+      following mask/filter combinations:
+    </para>
+    <programlisting><![CDATA[
+          filter  0x0101010101010101
+          mask    0x0101010101010101
+                  -> matches
+
+          filter  0x0303030303030303
+          mask    0x0101010101010101
+                  -> doesn't match
+
+          filter  0x0101010101010101
+          mask    0x0303030303030303
+                  -> matches
+    ]]></programlisting>
+
+    <para>
+      Hence, in order to catch all messages, a mask filled with
+      <constant>0xff</constant> bytes can be installed as a wildcard match rule.
+    </para>
+
+    <refsect2>
+      <title>Generations</title>
+
+      <para>
+        Uploaded matches may contain multiple masks, which have to be as large
+        as the bloom filter size defined by the bus. Each block of a mask is
+        called a <emphasis>generation</emphasis>, starting at index 0.
+
+        At match time, when a signal is about to be delivered, a bloom mask
+        generation is passed, which denotes which of the bloom masks the filter
+        should be matched against. This allows programs to provide backward
+        compatible masks at upload time, while older clients can still match
+        against older versions of filters.
+      </para>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>Matches for kernel notifications</title>
+    <para>
+      To receive kernel generated notifications (see
+      <citerefentry>
+        <refentrytitle>kdbus.message</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>),
+      a connection must install match rules that are different from
+      the bloom filter matches described in the section above. They can be
+      filtered by the connection ID that caused the notification to be sent, by
+      one of the names it currently owns, or by the type of the notification
+      (ID/name add/remove/change).
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Adding a match</title>
+    <para>
+      To add a match, the <constant>KDBUS_CMD_MATCH_ADD</constant> ioctl is
+      used, which takes a <type>struct kdbus_cmd_match</type> as an argument
+      described below.
+
+      Note that each of the items attached to this command will internally
+      create one match <emphasis>rule</emphasis>, and the collection of them,
+      which is submitted as one block via the ioctl, is called a
+      <emphasis>match</emphasis>. To allow a message to pass, all rules of a
+      match have to be satisfied. Hence, adding more items to the command will
+      only narrow the possibility of a match to effectively let the message
+      pass, and will decrease the chance that the connection's process will be
+      woken up needlessly.
+
+      Multiple matches can be installed per connection. As long as one of it has
+      a set of rules which allows the message to pass, this one will be
+      decisive.
+    </para>
+
+    <programlisting>
+struct kdbus_cmd_match {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  __u64 cookie;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>Flags to control the behavior of the ioctl.</para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_MATCH_REPLACE</constant></term>
+              <listitem>
+                <para>Make the endpoint file group-accessible</para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+              <listitem>
+                <para>
+                  Requests a set of valid flags for this ioctl. When this bit is
+                  set, no action is taken; the ioctl will return
+                  <errorcode>0</errorcode>, and the <varname>flags</varname>
+                  field will have all bits set that are valid for this command.
+                  The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+                  cleared by the operation.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>cookie</varname></term>
+        <listitem><para>
+          A cookie which identifies the match, so it can be referred to when
+          removing it.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+        <para>
+          Items to define the actual rules of the matches. The following item
+          types are expected. Each item will create one new match rule.
+        </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_BLOOM_MASK</constant></term>
+              <listitem>
+                <para>
+                  An item that carries the bloom filter mask to match against
+                  in its data field. The payload size must match the bloom
+                  filter size that was specified when the bus was created.
+                  See the "Bloom filters" section above for more information on
+                  bloom filters.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NAME</constant></term>
+              <listitem>
+                <para>
+                  When used as part of kernel notifications, this item specifies
+                  a name that is acquired, lost or that changed its owner (see
+                  below). When used as part of a match for user-generated signal
+                  messages, it specifies a name that the sending connection must
+                  own at the time of sending the signal.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_ID</constant></term>
+              <listitem>
+                <para>
+                  Specify a sender connection's ID that will match this rule.
+                  For kernel notifications, this specifies the ID of a
+                  connection that was added to or removed from the bus.
+                  For used-generated signals, it specifies the ID of the
+                  connection that sent the signal message.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NAME_ADD</constant></term>
+              <term><constant>KDBUS_ITEM_NAME_REMOVE</constant></term>
+              <term><constant>KDBUS_ITEM_NAME_CHANGE</constant></term>
+              <listitem>
+                <para>
+                  These items request delivery of kernel notifications that
+                  describe a name acquisition, loss, or change. The details
+                  are stored in the item's
+                  <varname>kdbus_notify_name_change</varname> member.
+                  All information specified must be matched in order to make
+                  the message pass. Use
+                  <constant>KDBUS_MATCH_ID_ANY</constant> to
+                  match against any unique connection ID.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_ID_ADD</constant></term>
+              <term><constant>KDBUS_ITEM_ID_REMOVE</constant></term>
+              <listitem>
+                <para>
+                  These items request delivery of kernel notifications that are
+                  generated when a connection is created or terminated.
+                  <type>struct kdbus_notify_id_change</type> is used to
+                  store the actual match information. This item can be used to
+                  monitor one particular connection ID, or, when the ID field
+                  is set to <constant>KDBUS_MATCH_ID_ANY</constant>,
+                  all of them.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+              <listitem><para>
+                With this item, programs can <emphasis>probe</emphasis> the
+                kernel for known item types. See
+                <citerefentry>
+                  <refentrytitle>kdbus.item</refentrytitle>
+                  <manvolnum>7</manvolnum>
+                </citerefentry>
+                for more details.
+              </para></listitem>
+            </varlistentry>
+          </variablelist>
+
+          <para>
+            Unrecognized items are rejected, and the ioctl will fail with
+            <varname>errno</varname> set to <constant>EINVAL</constant>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      Refer to
+      <citerefentry>
+        <refentrytitle>kdbus.message</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for more information on message types.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Removing a match</title>
+    <para>
+      Matches can be removed with the
+      <constant>KDBUS_CMD_MATCH_REMOVE</constant> ioctl, which takes
+      <type>struct kdbus_cmd_match</type> as argument, but its fields
+      usage slightly differs compared to that of
+      <constant>KDBUS_CMD_MATCH_ADD</constant>.
+    </para>
+
+    <programlisting>
+struct kdbus_cmd_match {
+  __u64 size;
+  __u64 cookie;
+  __u64 flags;
+  __u64 return_flags;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>cookie</varname></term>
+        <listitem><para>
+          The cookie of the match, as it was passed when the match was added.
+          All matches that have this cookie will be removed.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>
+          No flags are supported for this use case.
+          <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+          valid flags. If set, the ioctl will fail with
+          <errorcode>-1</errorcode>, <varname>errno</varname> is set to
+          <constant>EPROTO</constant>, and the <varname>flags</varname> field
+          is set to <constant>0</constant>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            No items are supported for this use case, but
+            <constant>KDBUS_ITEM_NEGOTIATE</constant> is allowed nevertheless.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Return value</title>
+    <para>
+      On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+      on error, <errorcode>-1</errorcode> is returned, and
+      <varname>errno</varname> is set to indicate the error.
+      If the issued ioctl is illegal for the file descriptor used,
+      <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+    </para>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_MATCH_ADD</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Illegal flags or items.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EDOM</constant></term>
+          <listitem><para>
+            Illegal bloom filter size.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EMFILE</constant></term>
+          <listitem><para>
+            Too many matches for this connection.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_MATCH_REMOVE</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Illegal flags.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EBADSLT</constant></term>
+          <listitem><para>
+            A match entry with the given cookie could not be found.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>See Also</title>
+    <simplelist type="inline">
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.bus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.match</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.fs</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.item</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.message</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.name</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.pool</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+    </simplelist>
+  </refsect1>
+</refentry>
diff -Nur linux-4.2/Documentation/kdbus/kdbus.message.xml linux-4.2-pck/Documentation/kdbus/kdbus.message.xml
--- linux-4.2/Documentation/kdbus/kdbus.message.xml	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/kdbus.message.xml	2015-09-08 00:48:22.211698004 -0300
@@ -0,0 +1,1276 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.message">
+
+  <refentryinfo>
+    <title>kdbus.message</title>
+    <productname>kdbus.message</productname>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>kdbus.message</refentrytitle>
+    <manvolnum>7</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>kdbus.message</refname>
+    <refpurpose>kdbus message</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      A kdbus message is used to exchange information between two connections
+      on a bus, or to transport notifications from the kernel to one or many
+      connections. This document describes the layout of messages, how payload
+      is added to them and how they are sent and received.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Message layout</title>
+
+    <para>The layout of a message is shown below.</para>
+
+    <programlisting>
+  +-------------------------------------------------------------------------+
+  | Message                                                                 |
+  | +---------------------------------------------------------------------+ |
+  | | Header                                                              | |
+  | | size:          overall message size, including the data records     | |
+  | | destination:   connection ID of the receiver                        | |
+  | | source:        connection ID of the sender (set by kernel)          | |
+  | | payload_type:  "DBusDBus" textual identifier stored as uint64_t     | |
+  | +---------------------------------------------------------------------+ |
+  | +---------------------------------------------------------------------+ |
+  | | Data Record                                                         | |
+  | | size:  overall record size (without padding)                        | |
+  | | type:  type of data                                                 | |
+  | | data:  reference to data (address or file descriptor)               | |
+  | +---------------------------------------------------------------------+ |
+  | +---------------------------------------------------------------------+ |
+  | | padding bytes to the next 8 byte alignment                          | |
+  | +---------------------------------------------------------------------+ |
+  | +---------------------------------------------------------------------+ |
+  | | Data Record                                                         | |
+  | | size:  overall record size (without padding)                        | |
+  | | ...                                                                 | |
+  | +---------------------------------------------------------------------+ |
+  | +---------------------------------------------------------------------+ |
+  | | padding bytes to the next 8 byte alignment                          | |
+  | +---------------------------------------------------------------------+ |
+  | +---------------------------------------------------------------------+ |
+  | | Data Record                                                         | |
+  | | size:  overall record size                                          | |
+  | | ...                                                                 | |
+  | +---------------------------------------------------------------------+ |
+  |   ... further data records ...                                          |
+  +-------------------------------------------------------------------------+
+    </programlisting>
+  </refsect1>
+
+  <refsect1>
+    <title>Message payload</title>
+
+    <para>
+      When connecting to the bus, receivers request a memory pool of a given
+      size, large enough to carry all backlog of data enqueued for the
+      connection. The pool is internally backed by a shared memory file which
+      can be <function>mmap()</function>ed by the receiver. See
+      <citerefentry>
+        <refentrytitle>kdbus.pool</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for more information.
+    </para>
+
+    <para>
+      Message payload must be described in items attached to a message when
+      it is sent. A receiver can access the payload by looking at the items
+      that are attached to a message in its pool. The following items are used.
+    </para>
+
+    <variablelist>
+      <varlistentry>
+        <term><constant>KDBUS_ITEM_PAYLOAD_VEC</constant></term>
+        <listitem>
+          <para>
+            This item references a piece of memory on the sender side which is
+            directly copied into the receiver's pool. This way, two peers can
+            exchange data by effectively doing a single-copy from one process
+            to another; the kernel will not buffer the data anywhere else.
+            This item is never found in a message received by a connection.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><constant>KDBUS_ITEM_PAYLOAD_OFF</constant></term>
+        <listitem>
+          <para>
+            This item is attached to messages on the receiving side and points
+            to a memory area inside the receiver's pool. The
+            <varname>offset</varname> variable in the item denotes the memory
+            location relative to the message itself.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant></term>
+        <listitem>
+          <para>
+            Messages can reference <emphasis>memfd</emphasis> files which
+            contain the data. memfd files are tmpfs-backed files that allow
+            sealing of the content of the file, which prevents all writable
+            access to the file content.
+          </para>
+          <para>
+            Only memfds that have
+            <constant>(F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_SEAL)
+            </constant>
+            set are accepted as payload data, which enforces reliable passing of
+            data. The receiver can assume that neither the sender nor anyone
+            else can alter the content after the message is sent. If those
+            seals are not set on the memfd, the ioctl will fail with
+            <errorcode>-1</errorcode>, and <varname>errno</varname> will be
+            set to <constant>ETXTBUSY</constant>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><constant>KDBUS_ITEM_FDS</constant></term>
+        <listitem>
+          <para>
+            Messages can transport regular file descriptors via
+            <constant>KDBUS_ITEM_FDS</constant>. This item carries an array
+            of <type>int</type> values in <varname>item.fd</varname>. The
+            maximum number of file descriptors in the item is
+            <constant>253</constant>, and only one item of this type is
+            accepted per message. All passed values must be valid file
+            descriptors; the open count of each file descriptors is increased
+            by installing it to the receiver's task. This item can only be
+            used for directed messages, not for broadcasts, and only to
+            remote peers that have opted-in for receiving file descriptors
+            at connection time (<constant>KDBUS_HELLO_ACCEPT_FD</constant>).
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      The sender must not make any assumptions on the type in which data is
+      received by the remote peer. The kernel is free to re-pack multiple
+      <constant>KDBUS_ITEM_PAYLOAD_VEC</constant> and
+      <constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant> payloads. For instance, the
+      kernel may decide to merge multiple <constant>VECs</constant> into a
+      single <constant>VEC</constant>, inline <constant>MEMFD</constant>
+      payloads into memory, or merge all passed <constant>VECs</constant> into a
+      single <constant>MEMFD</constant>. However, the kernel preserves the order
+      of passed data. This means that the order of all <constant>VEC</constant>
+      and <constant>MEMFD</constant> items is not changed in respect to each
+      other. In other words: All passed <constant>VEC</constant> and
+      <constant>MEMFD</constant> data payloads are treated as a single stream
+      of data that may be received by the remote peer in a different set of
+      chunks than it was sent as.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Sending messages</title>
+
+    <para>
+      Messages are passed to the kernel with the
+      <constant>KDBUS_CMD_SEND</constant> ioctl. Depending on the destination
+      address of the message, the kernel delivers the message to the specific
+      destination connection, or to some subset of all connections on the same
+      bus. Sending messages across buses is not possible. Messages are always
+      queued in the memory pool of the destination connection (see above).
+    </para>
+
+    <para>
+      The <constant>KDBUS_CMD_SEND</constant> ioctl uses a
+      <type>struct kdbus_cmd_send</type> to describe the message
+      transfer.
+    </para>
+    <programlisting>
+struct kdbus_cmd_send {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  __u64 msg_address;
+  struct kdbus_msg_info reply;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>Flags for message delivery</para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_SEND_SYNC_REPLY</constant></term>
+              <listitem>
+                <para>
+                  By default, all calls to kdbus are considered asynchronous,
+                  non-blocking. However, as there are many use cases that need
+                  to wait for a remote peer to answer a method call, there's a
+                  way to send a message and wait for a reply in a synchronous
+                  fashion. This is what the
+                  <constant>KDBUS_SEND_SYNC_REPLY</constant> controls. The
+                  <constant>KDBUS_CMD_SEND</constant> ioctl will block until the
+                  reply has arrived, the timeout limit is reached, in case the
+                  remote connection was shut down, or if interrupted by a signal
+                  before any reply; see
+                  <citerefentry>
+                    <refentrytitle>signal</refentrytitle>
+                    <manvolnum>7</manvolnum>
+                  </citerefentry>.
+
+                  The offset of the reply message in the sender's pool is stored
+                  in <varname>reply</varname> when the ioctl has returned without
+                  error. Hence, there is no need for another
+                  <constant>KDBUS_CMD_RECV</constant> ioctl or anything else to
+                  receive the reply.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+              <listitem>
+                <para>
+                  Request a set of valid flags for this ioctl. When this bit is
+                  set, no action is taken; the ioctl will fail with
+                  <errorcode>-1</errorcode>, <varname>errno</varname>
+                  is set to <constant>EPROTO</constant>.
+                  Once the ioctl returned, the <varname>flags</varname>
+                  field will have all bits set that the kernel recognizes as
+                  valid for this command.
+                  The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+                  cleared by the operation.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>msg_address</varname></term>
+        <listitem><para>
+          In this field, users have to provide a pointer to a message
+          (<type>struct kdbus_msg</type>) to send. See below for a
+          detailed description.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>reply</varname></term>
+        <listitem><para>
+          Only used for synchronous replies. See description of
+          <type>struct kdbus_cmd_recv</type> for more details.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            The following items are currently recognized.
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_CANCEL_FD</constant></term>
+              <listitem>
+                <para>
+                  When this optional item is passed in, and the call is
+                  executed as SYNC call, the passed in file descriptor can be
+                  used as alternative cancellation point. The kernel will call
+                  <citerefentry>
+                    <refentrytitle>poll</refentrytitle>
+                    <manvolnum>2</manvolnum>
+                  </citerefentry>
+                  on this file descriptor, and once it reports any incoming
+                  bytes, the blocking send operation will be canceled; the
+                  blocking, synchronous ioctl call will return
+                  <errorcode>-1</errorcode>, and <varname>errno</varname> will
+                  be set to <errorname>ECANCELED</errorname>.
+                  Any type of file descriptor on which
+                  <citerefentry>
+                    <refentrytitle>poll</refentrytitle>
+                    <manvolnum>2</manvolnum>
+                  </citerefentry>
+                  can be called on can be used as payload to this item; for
+                  example, an eventfd can be used for this purpose, see
+                  <citerefentry>
+                    <refentrytitle>eventfd</refentrytitle>
+                    <manvolnum>2</manvolnum>
+                  </citerefentry>.
+                  For asynchronous message sending, this item is allowed but
+                  ignored.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+          <para>
+            Unrecognized items are rejected, and the ioctl will fail with
+            <varname>errno</varname> set to <constant>EINVAL</constant>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      The message referenced by the <varname>msg_address</varname> above has
+      the following layout.
+    </para>
+
+    <programlisting>
+struct kdbus_msg {
+  __u64 size;
+  __u64 flags;
+  __s64 priority;
+  __u64 dst_id;
+  __u64 src_id;
+  __u64 payload_type;
+  __u64 cookie;
+  __u64 timeout_ns;
+  __u64 cookie_reply;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>Flags to describe message details.</para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_MSG_EXPECT_REPLY</constant></term>
+              <listitem>
+                <para>
+                  Expect a reply to this message from the remote peer. With
+                  this bit set, the timeout_ns field must be set to a non-zero
+                  number of nanoseconds in which the receiving peer is expected
+                  to reply. If such a reply is not received in time, the sender
+                  will be notified with a timeout message (see below). The
+                  value must be an absolute value, in nanoseconds and based on
+                  <constant>CLOCK_MONOTONIC</constant>.
+                </para><para>
+                  For a message to be accepted as reply, it must be a direct
+                  message to the original sender (not a broadcast and not a
+                  signal message), and its
+                  <varname>kdbus_msg.cookie_reply</varname> must match the
+                  previous message's <varname>kdbus_msg.cookie</varname>.
+                </para><para>
+                  Expected replies also temporarily open the policy of the
+                  sending connection, so the other peer is allowed to respond
+                  within the given time window.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_MSG_NO_AUTO_START</constant></term>
+              <listitem>
+                <para>
+                  By default, when a message is sent to an activator
+                  connection, the activator is notified and will start an
+                  implementer. This flag inhibits that behavior. With this bit
+                  set, and the remote being an activator, the ioctl will fail
+                  with <varname>errno</varname> set to
+                  <constant>EADDRNOTAVAIL</constant>.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+              <listitem>
+                <para>
+                  Requests a set of valid flags for this ioctl. When this bit is
+                  set, no action is taken; the ioctl will return
+                  <errorcode>0</errorcode>, and the <varname>flags</varname>
+                  field will have all bits set that are valid for this command.
+                  The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+                  cleared by the operation.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>priority</varname></term>
+        <listitem><para>
+          The priority of this message. Receiving messages (see below) may
+          optionally be constrained to messages of a minimal priority. This
+          allows for use cases where timing critical data is interleaved with
+          control data on the same connection. If unused, the priority field
+          should be set to <constant>0</constant>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>dst_id</varname></term>
+        <listitem><para>
+          The numeric ID of the destination connection, or
+          <constant>KDBUS_DST_ID_BROADCAST</constant>
+          (~0ULL) to address every peer on the bus, or
+          <constant>KDBUS_DST_ID_NAME</constant> (0) to look
+          it up dynamically from the bus' name registry.
+          In the latter case, an item of type
+          <constant>KDBUS_ITEM_DST_NAME</constant> is mandatory.
+          Also see
+          <citerefentry>
+            <refentrytitle>kdbus.name</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>
+          .
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>src_id</varname></term>
+        <listitem><para>
+          Upon return of the ioctl, this member will contain the sending
+          connection's numerical ID. Should be 0 at send time.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>payload_type</varname></term>
+        <listitem><para>
+          Type of the payload in the actual data records. Currently, only
+          <constant>KDBUS_PAYLOAD_DBUS</constant> is accepted as input value
+          of this field. When receiving messages that are generated by the
+          kernel (notifications), this field will contain
+          <constant>KDBUS_PAYLOAD_KERNEL</constant>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>cookie</varname></term>
+        <listitem><para>
+          Cookie of this message, for later recognition. Also, when replying
+          to a message (see above), the <varname>cookie_reply</varname>
+          field must match this value.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>timeout_ns</varname></term>
+        <listitem><para>
+          If the message sent requires a reply from the remote peer (see above),
+          this field contains the timeout in absolute nanoseconds based on
+          <constant>CLOCK_MONOTONIC</constant>. Also see
+          <citerefentry>
+            <refentrytitle>clock_gettime</refentrytitle>
+            <manvolnum>2</manvolnum>
+          </citerefentry>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>cookie_reply</varname></term>
+        <listitem><para>
+          If the message sent is a reply to another message, this field must
+          match the cookie of the formerly received message.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            A dynamically sized list of items to contain additional information.
+            The following items are expected/valid:
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_PAYLOAD_VEC</constant></term>
+              <term><constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant></term>
+              <term><constant>KDBUS_ITEM_FDS</constant></term>
+              <listitem>
+                <para>
+                  Actual data records containing the payload. See section
+                  "Message payload".
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_BLOOM_FILTER</constant></term>
+              <listitem>
+                <para>
+                  Bloom filter for matches (see below).
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_ITEM_DST_NAME</constant></term>
+              <listitem>
+                <para>
+                  Well-known name to send this message to. Required if
+                  <varname>dst_id</varname> is set to
+                  <constant>KDBUS_DST_ID_NAME</constant>.
+                  If a connection holding the given name can't be found,
+                  the ioctl will fail with <varname>errno</varname> set to
+                  <constant>ESRCH</constant> is returned.
+                </para>
+                <para>
+                  For messages to a unique name (ID), this item is optional. If
+                  present, the kernel will make sure the name owner matches the
+                  given unique name. This allows programs to tie the message
+                  sending to the condition that a name is currently owned by a
+                  certain unique name.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      The message will be augmented by the requested metadata items when
+      queued into the receiver's pool. See
+      <citerefentry>
+        <refentrytitle>kdbus.connection</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      and
+      <citerefentry>
+        <refentrytitle>kdbus.item</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for more information on metadata.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Receiving messages</title>
+
+    <para>
+      Messages are received by the client with the
+      <constant>KDBUS_CMD_RECV</constant> ioctl. The endpoint file of the bus
+      supports <function>poll()/epoll()/select()</function>; when new messages
+      are available on the connection's file descriptor,
+      <constant>POLLIN</constant> is reported. For compatibility reasons,
+      <constant>POLLOUT</constant> is always reported as well. Note, however,
+      that the latter does not guarantee that a message can in fact be sent, as
+      this depends on how many pending messages the receiver has in its pool.
+    </para>
+
+    <para>
+      With the <constant>KDBUS_CMD_RECV</constant> ioctl, a
+      <type>struct kdbus_cmd_recv</type> is used.
+    </para>
+
+    <programlisting>
+struct kdbus_cmd_recv {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  __s64 priority;
+  __u64 dropped_msgs;
+  struct kdbus_msg_info msg;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>Flags to control the receive command.</para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_RECV_PEEK</constant></term>
+              <listitem>
+                <para>
+                  Just return the location of the next message. Do not install
+                  file descriptors or anything else. This is usually used to
+                  determine the sender of the next queued message.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_RECV_DROP</constant></term>
+              <listitem>
+                <para>
+                  Drop the next message without doing anything else with it,
+                  and free the pool slice. This a short-cut for
+                  <constant>KDBUS_RECV_PEEK</constant> and
+                  <constant>KDBUS_CMD_FREE</constant>.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_RECV_USE_PRIORITY</constant></term>
+              <listitem>
+                <para>
+                  Dequeue the messages ordered by their priority, and filtering
+                  them with the priority field (see below).
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+              <listitem>
+                <para>
+                  Request a set of valid flags for this ioctl. When this bit is
+                  set, no action is taken; the ioctl will fail with
+                  <errorcode>-1</errorcode>, <varname>errno</varname>
+                  is set to <constant>EPROTO</constant>.
+                  Once the ioctl returned, the <varname>flags</varname>
+                  field will have all bits set that the kernel recognizes as
+                  valid for this command.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. If the <varname>dropped_msgs</varname>
+          field is non-zero, <constant>KDBUS_RECV_RETURN_DROPPED_MSGS</constant>
+          is set. If a file descriptor could not be installed, the
+          <constant>KDBUS_RECV_RETURN_INCOMPLETE_FDS</constant> flag is set.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>priority</varname></term>
+        <listitem><para>
+          With <constant>KDBUS_RECV_USE_PRIORITY</constant> set in
+          <varname>flags</varname>, messages will be dequeued ordered by their
+          priority, starting with the highest value. Also, messages will be
+          filtered by the value given in this field, so the returned message
+          will at least have the requested priority. If no such message is
+          waiting in the queue, the ioctl will fail, and
+          <varname>errno</varname> will be set to <constant>EAGAIN</constant>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>dropped_msgs</varname></term>
+        <listitem><para>
+          Whenever a message with <constant>KDBUS_MSG_SIGNAL</constant> is sent
+          but cannot be queued on a peer (e.g., as it contains FDs but the peer
+          does not support FDs, or there is no space left in the peer's pool)
+          the 'dropped_msgs' counter of the peer is incremented. On the next
+          RECV ioctl, the 'dropped_msgs' field is copied into the ioctl struct
+          and cleared on the peer. If it was non-zero, the
+          <constant>KDBUS_RECV_RETURN_DROPPED_MSGS</constant> flag will be set
+          in <varname>return_flags</varname>. Note that this will only happen
+          if the ioctl succeeded or failed with <constant>EAGAIN</constant>. In
+          other error cases, the 'dropped_msgs' field of the peer is left
+          untouched.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>msg</varname></term>
+        <listitem><para>
+          Embedded struct containing information on the received message when
+          this command succeeded (see below).
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem><para>
+          Items to specify further details for the receive command.
+          Currently unused, and all items will be rejected with
+          <varname>errno</varname> set to <constant>EINVAL</constant>.
+        </para></listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      Both <type>struct kdbus_cmd_recv</type> and
+      <type>struct kdbus_cmd_send</type> embed
+      <type>struct kdbus_msg_info</type>.
+      For the <constant>KDBUS_CMD_SEND</constant> ioctl, it is used to catch
+      synchronous replies, if one was requested, and is unused otherwise.
+    </para>
+
+    <programlisting>
+struct kdbus_msg_info {
+  __u64 offset;
+  __u64 msg_size;
+  __u64 return_flags;
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>offset</varname></term>
+        <listitem><para>
+          Upon return of the ioctl, this field contains the offset in the
+          receiver's memory pool. The memory must be freed with
+          <constant>KDBUS_CMD_FREE</constant>. See
+          <citerefentry>
+            <refentrytitle>kdbus.pool</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>
+          for further details.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>msg_size</varname></term>
+        <listitem><para>
+          Upon successful return of the ioctl, this field contains the size of
+          the allocated slice at offset <varname>offset</varname>.
+          It is the combination of the size of the stored
+          <type>struct kdbus_msg</type> object plus all appended VECs.
+          You can use it in combination with <varname>offset</varname> to map
+          a single message, instead of mapping the entire pool. See
+          <citerefentry>
+            <refentrytitle>kdbus.pool</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>
+          for further details.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem>
+          <para>
+            Kernel-provided return flags. Currently, the following flags are
+            defined.
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_RECV_RETURN_INCOMPLETE_FDS</constant></term>
+              <listitem>
+                <para>
+                  The message contained memfds or file descriptors, and the
+                  kernel failed to install one or more of them at receive time.
+                  Most probably that happened because the maximum number of
+                  file descriptors for the receiver's task were exceeded.
+                  In such cases, the message is still delivered, so this is not
+                  a fatal condition. File descriptors numbers inside the
+                  <constant>KDBUS_ITEM_FDS</constant> item or memfd files
+                  referenced by <constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant>
+                  items which could not be installed will be set to
+                  <constant>-1</constant>.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      Unless <constant>KDBUS_RECV_DROP</constant> was passed, the
+      <varname>offset</varname> field contains the location of the new message
+      inside the receiver's pool after the <constant>KDBUS_CMD_RECV</constant>
+      ioctl was employed. The message is stored as <type>struct kdbus_msg</type>
+      at this offset, and can be interpreted with the semantics described above.
+    </para>
+    <para>
+      Also, if the connection allowed for file descriptor to be passed
+      (<constant>KDBUS_HELLO_ACCEPT_FD</constant>), and if the message contained
+      any, they will be installed into the receiving process when the
+      <constant>KDBUS_CMD_RECV</constant> ioctl is called.
+      <emphasis>memfds</emphasis> may always be part of the message payload.
+      The receiving task is obliged to close all file descriptors appropriately
+      once no longer needed. If <constant>KDBUS_RECV_PEEK</constant> is set, no
+      file descriptors are installed. This allows for peeking at a message,
+      looking at its metadata only and dropping it via
+      <constant>KDBUS_RECV_DROP</constant>, without installing any of the file
+      descriptors into the receiving process.
+    </para>
+    <para>
+      The caller is obliged to call the <constant>KDBUS_CMD_FREE</constant>
+      ioctl with the returned offset when the memory is no longer needed.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Notifications</title>
+    <para>
+      A kernel notification is a regular kdbus message with the following
+      details.
+    </para>
+
+    <itemizedlist>
+      <listitem><para>
+          kdbus_msg.src_id == <constant>KDBUS_SRC_ID_KERNEL</constant>
+      </para></listitem>
+      <listitem><para>
+        kdbus_msg.dst_id == <constant>KDBUS_DST_ID_BROADCAST</constant>
+      </para></listitem>
+      <listitem><para>
+        kdbus_msg.payload_type == <constant>KDBUS_PAYLOAD_KERNEL</constant>
+      </para></listitem>
+      <listitem><para>
+        Has exactly one of the items attached that are described below.
+      </para></listitem>
+      <listitem><para>
+        Always has a timestamp item (<constant>KDBUS_ITEM_TIMESTAMP</constant>)
+        attached.
+      </para></listitem>
+    </itemizedlist>
+
+    <para>
+      The kernel will notify its users of the following events.
+    </para>
+
+    <itemizedlist>
+      <listitem><para>
+        When connection <emphasis>A</emphasis> is terminated while connection
+        <emphasis>B</emphasis> is waiting for a reply from it, connection
+        <emphasis>B</emphasis> is notified with a message with an item of
+        type <constant>KDBUS_ITEM_REPLY_DEAD</constant>.
+      </para></listitem>
+
+      <listitem><para>
+        When connection <emphasis>A</emphasis> does not receive a reply from
+        connection <emphasis>B</emphasis> within the specified timeout window,
+        connection <emphasis>A</emphasis> will receive a message with an
+        item of type <constant>KDBUS_ITEM_REPLY_TIMEOUT</constant>.
+      </para></listitem>
+
+      <listitem><para>
+        When an ordinary connection (not a monitor) is created on or removed
+        from a bus, messages with an item of type
+        <constant>KDBUS_ITEM_ID_ADD</constant> or
+        <constant>KDBUS_ITEM_ID_REMOVE</constant>, respectively, are delivered
+        to all bus members that match these messages through their match
+        database. Eavesdroppers (monitor connections) do not cause such
+        notifications to be sent. They are invisible on the bus.
+      </para></listitem>
+
+      <listitem><para>
+        When a connection gains or loses ownership of a name, messages with an
+        item of type <constant>KDBUS_ITEM_NAME_ADD</constant>,
+        <constant>KDBUS_ITEM_NAME_REMOVE</constant> or
+        <constant>KDBUS_ITEM_NAME_CHANGE</constant> are delivered to all bus
+        members that match these messages through their match database.
+      </para></listitem>
+    </itemizedlist>
+  </refsect1>
+
+  <refsect1>
+    <title>Return value</title>
+    <para>
+      On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+      on error, <errorcode>-1</errorcode> is returned, and
+      <varname>errno</varname> is set to indicate the error.
+      If the issued ioctl is illegal for the file descriptor used,
+      <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+    </para>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_SEND</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EOPNOTSUPP</constant></term>
+          <listitem><para>
+            The connection is not an ordinary connection, or the passed
+            file descriptors in <constant>KDBUS_ITEM_FDS</constant> item are
+            either kdbus handles or unix domain sockets. Both are currently
+            unsupported.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            The submitted payload type is
+            <constant>KDBUS_PAYLOAD_KERNEL</constant>,
+            <constant>KDBUS_MSG_EXPECT_REPLY</constant> was set without timeout
+            or cookie values, <constant>KDBUS_SEND_SYNC_REPLY</constant> was
+            set without <constant>KDBUS_MSG_EXPECT_REPLY</constant>, an invalid
+            item was supplied, <constant>src_id</constant> was non-zero and was
+            different from the current connection's ID, a supplied memfd had a
+            size of 0, or a string was not properly null-terminated.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ENOTUNIQ</constant></term>
+          <listitem><para>
+            The supplied destination is
+            <constant>KDBUS_DST_ID_BROADCAST</constant> and either
+            file descriptors were passed, or
+            <constant>KDBUS_MSG_EXPECT_REPLY</constant> was set,
+            or a timeout was given.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>E2BIG</constant></term>
+          <listitem><para>
+            Too many items.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EMSGSIZE</constant></term>
+          <listitem><para>
+            The size of the message header and items or the payload vector
+            is excessive.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EEXIST</constant></term>
+          <listitem><para>
+            Multiple <constant>KDBUS_ITEM_FDS</constant>,
+            <constant>KDBUS_ITEM_BLOOM_FILTER</constant> or
+            <constant>KDBUS_ITEM_DST_NAME</constant> items were supplied.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EBADF</constant></term>
+          <listitem><para>
+            The supplied <constant>KDBUS_ITEM_FDS</constant> or
+            <constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant> items
+            contained an illegal file descriptor.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EMEDIUMTYPE</constant></term>
+          <listitem><para>
+            The supplied memfd is not a sealed kdbus memfd.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EMFILE</constant></term>
+          <listitem><para>
+            Too many file descriptors inside a
+            <constant>KDBUS_ITEM_FDS</constant>.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EBADMSG</constant></term>
+          <listitem><para>
+            An item had illegal size, both a <constant>dst_id</constant> and a
+            <constant>KDBUS_ITEM_DST_NAME</constant> was given, or both a name
+            and a bloom filter was given.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ETXTBSY</constant></term>
+          <listitem><para>
+            The supplied kdbus memfd file cannot be sealed or the seal
+            was removed, because it is shared with other processes or
+            still mapped with
+            <citerefentry>
+              <refentrytitle>mmap</refentrytitle>
+              <manvolnum>2</manvolnum>
+            </citerefentry>.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ECOMM</constant></term>
+          <listitem><para>
+            A peer does not accept the file descriptors addressed to it.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EFAULT</constant></term>
+          <listitem><para>
+            The supplied bloom filter size was not 64-bit aligned, or supplied
+            memory could not be accessed by the kernel.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EDOM</constant></term>
+          <listitem><para>
+            The supplied bloom filter size did not match the bloom filter
+            size of the bus.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EDESTADDRREQ</constant></term>
+          <listitem><para>
+            <constant>dst_id</constant> was set to
+            <constant>KDBUS_DST_ID_NAME</constant>, but no
+            <constant>KDBUS_ITEM_DST_NAME</constant> was attached.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ESRCH</constant></term>
+          <listitem><para>
+            The name to look up was not found in the name registry.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EADDRNOTAVAIL</constant></term>
+          <listitem><para>
+            <constant>KDBUS_MSG_NO_AUTO_START</constant> was given but the
+            destination connection is an activator.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ENXIO</constant></term>
+          <listitem><para>
+            The passed numeric destination connection ID couldn't be found,
+            or is not connected.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ECONNRESET</constant></term>
+          <listitem><para>
+            The destination connection is no longer active.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ETIMEDOUT</constant></term>
+          <listitem><para>
+            Timeout while synchronously waiting for a reply.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINTR</constant></term>
+          <listitem><para>
+            Interrupted system call while synchronously waiting for a reply.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EPIPE</constant></term>
+          <listitem><para>
+            When sending a message, a synchronous reply from the receiving
+            connection was expected but the connection died before answering.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ENOBUFS</constant></term>
+          <listitem><para>
+            Too many pending messages on the receiver side.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EREMCHG</constant></term>
+          <listitem><para>
+            Both a well-known name and a unique name (ID) was given, but
+            the name is not currently owned by that connection.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EXFULL</constant></term>
+          <listitem><para>
+            The memory pool of the receiver is full.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EREMOTEIO</constant></term>
+          <listitem><para>
+            While synchronously waiting for a reply, the remote peer
+            failed with an I/O error.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_RECV</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EOPNOTSUPP</constant></term>
+          <listitem><para>
+            The connection is not an ordinary connection, or the passed
+            file descriptors are either kdbus handles or unix domain
+            sockets. Both are currently unsupported.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Invalid flags or offset.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EAGAIN</constant></term>
+          <listitem><para>
+            No message found in the queue.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>See Also</title>
+    <simplelist type="inline">
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.bus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.connection</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.endpoint</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.fs</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.item</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.name</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.pool</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>clock_gettime</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>ioctl</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>poll</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>select</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>epoll</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>eventfd</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>memfd_create</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+    </simplelist>
+  </refsect1>
+</refentry>
diff -Nur linux-4.2/Documentation/kdbus/kdbus.name.xml linux-4.2-pck/Documentation/kdbus/kdbus.name.xml
--- linux-4.2/Documentation/kdbus/kdbus.name.xml	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/kdbus.name.xml	2015-09-08 00:48:22.211698004 -0300
@@ -0,0 +1,711 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.name">
+
+  <refentryinfo>
+    <title>kdbus.name</title>
+    <productname>kdbus.name</productname>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>kdbus.name</refentrytitle>
+    <manvolnum>7</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>kdbus.name</refname>
+    <refpurpose>kdbus.name</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      Each
+      <citerefentry>
+        <refentrytitle>kdbus.bus</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      instantiates a name registry to resolve well-known names into unique
+      connection IDs for message delivery. The registry will be queried when a
+      message is sent with <varname>kdbus_msg.dst_id</varname> set to
+      <constant>KDBUS_DST_ID_NAME</constant>, or when a registry dump is
+      requested with <constant>KDBUS_CMD_NAME_LIST</constant>.
+    </para>
+
+    <para>
+      All of the below is subject to policy rules for <emphasis>SEE</emphasis>
+      and <emphasis>OWN</emphasis> permissions. See
+      <citerefentry>
+        <refentrytitle>kdbus.policy</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for more information.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Name validity</title>
+    <para>
+      A name has to comply with the following rules in order to be considered
+      valid.
+    </para>
+
+    <itemizedlist>
+      <listitem>
+        <para>
+          The name has two or more elements separated by a
+          '<literal>.</literal>' (period) character.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          All elements must contain at least one character.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          Each element must only contain the ASCII characters
+          <literal>[A-Z][a-z][0-9]_</literal> and must not begin with a
+          digit.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          The name must contain at least one '<literal>.</literal>' (period)
+          character (and thus at least two elements).
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          The name must not begin with a '<literal>.</literal>' (period)
+          character.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          The name must not exceed <constant>255</constant> characters in
+          length.
+        </para>
+      </listitem>
+    </itemizedlist>
+  </refsect1>
+
+  <refsect1>
+    <title>Acquiring a name</title>
+    <para>
+      To acquire a name, a client uses the
+      <constant>KDBUS_CMD_NAME_ACQUIRE</constant> ioctl with
+      <type>struct kdbus_cmd</type> as argument.
+    </para>
+
+    <programlisting>
+struct kdbus_cmd {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>Flags to control details in the name acquisition.</para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_NAME_REPLACE_EXISTING</constant></term>
+              <listitem>
+                <para>
+                  Acquiring a name that is already present usually fails,
+                  unless this flag is set in the call, and
+                  <constant>KDBUS_NAME_ALLOW_REPLACEMENT</constant> (see below)
+                  was set when the current owner of the name acquired it, or
+                  if the current owner is an activator connection (see
+                  <citerefentry>
+                    <refentrytitle>kdbus.connection</refentrytitle>
+                    <manvolnum>7</manvolnum>
+                  </citerefentry>).
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_NAME_ALLOW_REPLACEMENT</constant></term>
+              <listitem>
+                <para>
+                  Allow other connections to take over this name. When this
+                  happens, the former owner of the connection will be notified
+                  of the name loss.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_NAME_QUEUE</constant></term>
+              <listitem>
+                <para>
+                  A name that is already acquired by a connection can not be
+                  acquired again (unless the
+                  <constant>KDBUS_NAME_ALLOW_REPLACEMENT</constant> flag was
+                  set during acquisition; see above).
+                  However, a connection can put itself in a queue of
+                  connections waiting for the name to be released. Once that
+                  happens, the first connection in that queue becomes the new
+                  owner and is notified accordingly.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+              <listitem>
+                <para>
+                  Request a set of valid flags for this ioctl. When this bit is
+                  set, no action is taken; the ioctl will fail with
+                  <errorcode>-1</errorcode>, and <varname>errno</varname>
+                  is set to <constant>EPROTO</constant>.
+                  Once the ioctl returned, the <varname>flags</varname>
+                  field will have all bits set that the kernel recognizes as
+                  valid for this command.
+                  The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+                  cleared by the operation.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem>
+          <para>
+            Flags returned by the kernel. Currently, the following may be
+            returned by the kernel.
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_NAME_IN_QUEUE</constant></term>
+              <listitem>
+                <para>
+                  The name was not acquired yet, but the connection was
+                  placed in the queue of peers waiting for the name.
+                  This can only happen if <constant>KDBUS_NAME_QUEUE</constant>
+                  was set in the <varname>flags</varname> member (see above).
+                  The connection will receive a name owner change notification
+                  once the current owner has given up the name and its
+                  ownership was transferred.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            Items to submit the name. Currently, one item of type
+            <constant>KDBUS_ITEM_NAME</constant> is expected and allowed, and
+            the contained string must be a valid bus name.
+            <constant>KDBUS_ITEM_NEGOTIATE</constant> may be used to probe for
+            valid item types. See
+            <citerefentry>
+              <refentrytitle>kdbus.item</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for a detailed description of how this item is used.
+          </para>
+          <para>
+            Unrecognized items are rejected, and the ioctl will fail with
+            <varname>errno</varname> set to <errorname>>EINVAL</errorname>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Releasing a name</title>
+    <para>
+      A connection may release a name explicitly with the
+      <constant>KDBUS_CMD_NAME_RELEASE</constant> ioctl. If the connection was
+      an implementer of an activatable name, its pending messages are moved
+      back to the activator. If there are any connections queued up as waiters
+      for the name, the first one in the queue (the oldest entry) will become
+      the new owner. The same happens implicitly for all names once a
+      connection terminates. See
+      <citerefentry>
+        <refentrytitle>kdbus.connection</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for more information on connections.
+    </para>
+    <para>
+      The <constant>KDBUS_CMD_NAME_RELEASE</constant> ioctl uses the same data
+      structure as the acquisition call
+      (<constant>KDBUS_CMD_NAME_ACQUIRE</constant>),
+      but with slightly different field usage.
+    </para>
+
+    <programlisting>
+struct kdbus_cmd {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>
+          Flags to the command. Currently unused.
+          <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+          valid flags. If set, the ioctl will return <errorcode>0</errorcode>,
+          and the <varname>flags</varname> field is set to
+          <constant>0</constant>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            Items to submit the name. Currently, one item of type
+            <constant>KDBUS_ITEM_NAME</constant> is expected and allowed, and
+            the contained string must be a valid bus name.
+            <constant>KDBUS_ITEM_NEGOTIATE</constant> may be used to probe for
+            valid item types. See
+            <citerefentry>
+              <refentrytitle>kdbus.item</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+            for a detailed description of how this item is used.
+          </para>
+          <para>
+            Unrecognized items are rejected, and the ioctl will fail with
+            <varname>errno</varname> set to <constant>EINVAL</constant>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Dumping the name registry</title>
+    <para>
+      A connection may request a complete or filtered dump of currently active
+      bus names with the <constant>KDBUS_CMD_LIST</constant> ioctl, which
+      takes a <type>struct kdbus_cmd_list</type> as argument.
+    </para>
+
+    <programlisting>
+struct kdbus_cmd_list {
+  __u64 flags;
+  __u64 return_flags;
+  __u64 offset;
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem>
+          <para>
+            Any combination of flags to specify which names should be dumped.
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_LIST_UNIQUE</constant></term>
+              <listitem>
+                <para>
+                  List the unique (numeric) IDs of the connection, whether it
+                  owns a name or not.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_LIST_NAMES</constant></term>
+              <listitem>
+                <para>
+                  List well-known names stored in the database which are
+                  actively owned by a real connection (not an activator).
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_LIST_ACTIVATORS</constant></term>
+              <listitem>
+                <para>
+                  List names that are owned by an activator.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_LIST_QUEUED</constant></term>
+              <listitem>
+                <para>
+                  List connections that are not yet owning a name but are
+                  waiting for it to become available.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+              <listitem>
+                <para>
+                  Request a set of valid flags for this ioctl. When this bit is
+                  set, no action is taken; the ioctl will fail with
+                  <errorcode>-1</errorcode>, and <varname>errno</varname>
+                  is set to <constant>EPROTO</constant>.
+                  Once the ioctl returned, the <varname>flags</varname>
+                  field will have all bits set that the kernel recognizes as
+                  valid for this command.
+                  The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+                  cleared by the operation.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>offset</varname></term>
+        <listitem><para>
+          When the ioctl returns successfully, the offset to the name registry
+          dump inside the connection's pool will be stored in this field.
+        </para></listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      The returned list of names is stored in a <type>struct kdbus_list</type>
+      that in turn contains an array of type <type>struct kdbus_info</type>,
+      The array-size in bytes is given as <varname>list_size</varname>.
+      The fields inside <type>struct kdbus_info</type> is described next.
+    </para>
+
+    <programlisting>
+struct kdbus_info {
+  __u64 size;
+  __u64 id;
+  __u64 flags;
+  struct kdbus_item items[0];
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>id</varname></term>
+        <listitem><para>
+          The owning connection's unique ID.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>
+          The flags of the owning connection.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem>
+          <para>
+            Items containing the actual name. Currently, one item of type
+            <constant>KDBUS_ITEM_OWNED_NAME</constant> will be attached,
+            including the name's flags. In that item, the flags field of the
+            name may carry the following bits:
+          </para>
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_NAME_ALLOW_REPLACEMENT</constant></term>
+              <listitem>
+                <para>
+                  Other connections are allowed to take over this name from the
+                  connection that owns it.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_NAME_IN_QUEUE</constant></term>
+              <listitem>
+                <para>
+                  When retrieving a list of currently acquired names in the
+                  registry, this flag indicates whether the connection
+                  actually owns the name or is currently waiting for it to
+                  become available.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_NAME_ACTIVATOR</constant></term>
+              <listitem>
+                <para>
+                  An activator connection owns a name as a placeholder for an
+                  implementer, which is started on demand by programs as soon
+                  as the first message arrives. There's some more information
+                  on this topic in
+                  <citerefentry>
+                    <refentrytitle>kdbus.connection</refentrytitle>
+                    <manvolnum>7</manvolnum>
+                  </citerefentry>
+                  .
+                </para>
+                <para>
+                  In contrast to
+                  <constant>KDBUS_NAME_REPLACE_EXISTING</constant>,
+                  when a name is taken over from an activator connection, all
+                  the messages that have been queued in the activator
+                  connection will be moved over to the new owner. The activator
+                  connection will still be tracked for the name and will take
+                  control again if the implementer connection terminates.
+                </para>
+                <para>
+                  This flag can not be used when acquiring a name, but is
+                  implicitly set through <constant>KDBUS_CMD_HELLO</constant>
+                  with <constant>KDBUS_HELLO_ACTIVATOR</constant> set in
+                  <varname>kdbus_cmd_hello.conn_flags</varname>.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+              <listitem>
+                <para>
+                  Requests a set of valid flags for this ioctl. When this bit is
+                  set, no action is taken; the ioctl will return
+                  <errorcode>0</errorcode>, and the <varname>flags</varname>
+                  field will have all bits set that are valid for this command.
+                  The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+                  cleared by the operation.
+                </para>
+              </listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      The returned buffer must be freed with the
+      <constant>KDBUS_CMD_FREE</constant> ioctl when the user is finished with
+      it. See
+      <citerefentry>
+        <refentrytitle>kdbus.pool</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for more information.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Return value</title>
+    <para>
+      On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+      on error, <errorcode>-1</errorcode> is returned, and
+      <varname>errno</varname> is set to indicate the error.
+      If the issued ioctl is illegal for the file descriptor used,
+      <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+    </para>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_NAME_ACQUIRE</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Illegal command flags, illegal name provided, or an activator
+            tried to acquire a second name.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EPERM</constant></term>
+          <listitem><para>
+            Policy prohibited name ownership.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EALREADY</constant></term>
+          <listitem><para>
+            Connection already owns that name.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EEXIST</constant></term>
+          <listitem><para>
+            The name already exists and can not be taken over.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>E2BIG</constant></term>
+          <listitem><para>
+            The maximum number of well-known names per connection is exhausted.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_NAME_RELEASE</constant>
+        may fail with the following errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Invalid command flags, or invalid name provided.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ESRCH</constant></term>
+          <listitem><para>
+            Name is not found in the registry.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EADDRINUSE</constant></term>
+          <listitem><para>
+            Name is owned by a different connection and can't be released.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_LIST</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Invalid command flags
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>ENOBUFS</constant></term>
+          <listitem><para>
+            No available memory in the connection's pool.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>See Also</title>
+    <simplelist type="inline">
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.bus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.connection</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.item</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.policy</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.pool</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+    </simplelist>
+  </refsect1>
+</refentry>
diff -Nur linux-4.2/Documentation/kdbus/kdbus.policy.xml linux-4.2-pck/Documentation/kdbus/kdbus.policy.xml
--- linux-4.2/Documentation/kdbus/kdbus.policy.xml	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/kdbus.policy.xml	2015-09-08 00:48:22.211698004 -0300
@@ -0,0 +1,406 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.policy">
+
+  <refentryinfo>
+    <title>kdbus.policy</title>
+    <productname>kdbus.policy</productname>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>kdbus.policy</refentrytitle>
+    <manvolnum>7</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>kdbus.policy</refname>
+    <refpurpose>kdbus policy</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      A kdbus policy restricts the possibilities of connections to own, see and
+      talk to well-known names. A policy can be associated with a bus (through a
+      policy holder connection) or a custom endpoint. kdbus stores its policy
+      information in a database that can be accessed through the following
+      ioctl commands:
+    </para>
+
+    <variablelist>
+      <varlistentry>
+        <term><constant>KDBUS_CMD_HELLO</constant></term>
+        <listitem><para>
+          When creating, or updating, a policy holder connection. See
+          <citerefentry>
+            <refentrytitle>kdbus.connection</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><constant>KDBUS_CMD_ENDPOINT_MAKE</constant></term>
+        <term><constant>KDBUS_CMD_ENDPOINT_UPDATE</constant></term>
+        <listitem><para>
+          When creating, or updating, a bus custom endpoint. See
+          <citerefentry>
+            <refentrytitle>kdbus.endpoint</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>.
+        </para></listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      In all cases, the name and policy access information is stored in items
+      of type <constant>KDBUS_ITEM_NAME</constant> and
+      <constant>KDBUS_ITEM_POLICY_ACCESS</constant>. For this transport, the
+      following rules apply.
+    </para>
+
+    <itemizedlist>
+      <listitem>
+        <para>
+          An item of type <constant>KDBUS_ITEM_NAME</constant> must be followed
+          by at least one <constant>KDBUS_ITEM_POLICY_ACCESS</constant> item.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          An item of type <constant>KDBUS_ITEM_NAME</constant> can be followed
+          by an arbitrary number of
+          <constant>KDBUS_ITEM_POLICY_ACCESS</constant> items.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          An arbitrary number of groups of names and access levels can be given.
+        </para>
+      </listitem>
+    </itemizedlist>
+
+    <para>
+      Names passed in items of type <constant>KDBUS_ITEM_NAME</constant> must
+      comply to the rules of valid kdbus.name. See
+      <citerefentry>
+        <refentrytitle>kdbus.name</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for more information.
+
+      The payload of an item of type
+      <constant>KDBUS_ITEM_POLICY_ACCESS</constant> is defined by the following
+      struct. For more information on the layout of items, please refer to
+      <citerefentry>
+        <refentrytitle>kdbus.item</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>.
+    </para>
+
+    <programlisting>
+struct kdbus_policy_access {
+  __u64 type;
+  __u64 access;
+  __u64 id;
+};
+    </programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>type</varname></term>
+        <listitem>
+          <para>
+            One of the following.
+          </para>
+
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_POLICY_ACCESS_USER</constant></term>
+              <listitem><para>
+                Grant access to a user with the UID stored in the
+                <varname>id</varname> field.
+              </para></listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_POLICY_ACCESS_GROUP</constant></term>
+              <listitem><para>
+                Grant access to a user with the GID stored in the
+                <varname>id</varname> field.
+              </para></listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_POLICY_ACCESS_WORLD</constant></term>
+              <listitem><para>
+                Grant access to everyone. The <varname>id</varname> field
+                is ignored.
+              </para></listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>access</varname></term>
+        <listitem>
+          <para>
+            The access to grant. One of the following.
+          </para>
+
+          <variablelist>
+            <varlistentry>
+              <term><constant>KDBUS_POLICY_SEE</constant></term>
+              <listitem><para>
+                Allow the name to be seen.
+              </para></listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_POLICY_TALK</constant></term>
+              <listitem><para>
+                Allow the name to be talked to.
+              </para></listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><constant>KDBUS_POLICY_OWN</constant></term>
+              <listitem><para>
+                Allow the name to be owned.
+              </para></listitem>
+            </varlistentry>
+          </variablelist>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>id</varname></term>
+        <listitem><para>
+           For <constant>KDBUS_POLICY_ACCESS_USER</constant>, stores the UID.
+           For <constant>KDBUS_POLICY_ACCESS_GROUP</constant>, stores the GID.
+        </para></listitem>
+      </varlistentry>
+
+    </variablelist>
+
+    <para>
+      All endpoints of buses have an empty policy database by default.
+      Therefore, unless policy rules are added, all operations will also be
+      denied by default. Also see
+      <citerefentry>
+        <refentrytitle>kdbus.endpoint</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Wildcard names</title>
+    <para>
+      Policy holder connections may upload names that contain the wildcard
+      suffix (<literal>".*"</literal>). Such a policy entry is effective for
+      every well-known name that extends the provided name by exactly one more
+      level.
+
+      For example, the name <literal>foo.bar.*</literal> matches both
+      <literal>"foo.bar.baz"</literal> and
+      <literal>"foo.bar.bazbaz"</literal> are, but not
+      <literal>"foo.bar.baz.baz"</literal>.
+
+      This allows connections to take control over multiple names that the
+      policy holder doesn't need to know about when uploading the policy.
+
+      Such wildcard entries are not allowed for custom endpoints.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Privileged connections</title>
+    <para>
+      The policy database is overruled when action is taken by a privileged
+      connection. Please refer to
+      <citerefentry>
+        <refentrytitle>kdbus.connection</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for more information on what makes a connection privileged.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Examples</title>
+    <para>
+      For instance, a set of policy rules may look like this:
+    </para>
+
+    <programlisting>
+KDBUS_ITEM_NAME: str='org.foo.bar'
+KDBUS_ITEM_POLICY_ACCESS: type=USER, access=OWN, ID=1000
+KDBUS_ITEM_POLICY_ACCESS: type=USER, access=TALK, ID=1001
+KDBUS_ITEM_POLICY_ACCESS: type=WORLD, access=SEE
+
+KDBUS_ITEM_NAME: str='org.blah.baz'
+KDBUS_ITEM_POLICY_ACCESS: type=USER, access=OWN, ID=0
+KDBUS_ITEM_POLICY_ACCESS: type=WORLD, access=TALK
+    </programlisting>
+
+    <para>
+      That means that 'org.foo.bar' may only be owned by UID 1000, but every
+      user on the bus is allowed to see the name. However, only UID 1001 may
+      actually send a message to the connection and receive a reply from it.
+
+      The second rule allows 'org.blah.baz' to be owned by UID 0 only, but
+      every user may talk to it.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>TALK access and multiple well-known names per connection</title>
+    <para>
+      Note that TALK access is checked against all names of a connection. For
+      example, if a connection owns both <constant>'org.foo.bar'</constant> and
+      <constant>'org.blah.baz'</constant>, and the policy database allows
+      <constant>'org.blah.baz'</constant> to be talked to by WORLD, then this
+      permission is also granted to <constant>'org.foo.bar'</constant>. That
+      might sound illogical, but after all, we allow messages to be directed to
+      either the ID or a well-known name, and policy is applied to the
+      connection, not the name. In other words, the effective TALK policy for a
+      connection is the most permissive of all names the connection owns.
+
+      For broadcast messages, the receiver needs TALK permissions to the sender
+      to receive the broadcast.
+    </para>
+    <para>
+      Both the endpoint and the bus policy databases are consulted to allow
+      name registry listing, owning a well-known name and message delivery.
+      If either one fails, the operation is failed with
+      <varname>errno</varname> set to <constant>EPERM</constant>.
+
+      For best practices, connections that own names with a restricted TALK
+      access should not install matches. This avoids cases where the sent
+      message may pass the bloom filter due to false-positives and may also
+      satisfy the policy rules.
+
+      Also see
+      <citerefentry>
+        <refentrytitle>kdbus.match</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Implicit policies</title>
+    <para>
+      Depending on the type of the endpoint, a set of implicit rules that
+      override installed policies might be enforced.
+
+      On default endpoints, the following set is enforced and checked before
+      any user-supplied policy is checked.
+    </para>
+
+    <itemizedlist>
+      <listitem>
+        <para>
+          Privileged connections always override any installed policy. Those
+          connections could easily install their own policies, so there is no
+          reason to enforce installed policies.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          Connections can always talk to connections of the same user. This
+          includes broadcast messages.
+        </para>
+      </listitem>
+    </itemizedlist>
+
+    <para>
+      Custom endpoints have stricter policies. The following rules apply:
+    </para>
+
+    <itemizedlist>
+      <listitem>
+        <para>
+          Policy rules are always enforced, even if the connection is a
+          privileged connection.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          Policy rules are always enforced for <constant>TALK</constant> access,
+          even if both ends are running under the same user. This includes
+          broadcast messages.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          To restrict the set of names that can be seen, endpoint policies can
+          install <constant>SEE</constant> policies.
+        </para>
+      </listitem>
+    </itemizedlist>
+  </refsect1>
+
+  <refsect1>
+    <title>See Also</title>
+    <simplelist type="inline">
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.bus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.endpoint</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.fs</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.item</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.message</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.name</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.pool</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+    </simplelist>
+  </refsect1>
+</refentry>
diff -Nur linux-4.2/Documentation/kdbus/kdbus.pool.xml linux-4.2-pck/Documentation/kdbus/kdbus.pool.xml
--- linux-4.2/Documentation/kdbus/kdbus.pool.xml	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/kdbus.pool.xml	2015-09-08 00:48:22.211698004 -0300
@@ -0,0 +1,326 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.pool">
+
+  <refentryinfo>
+    <title>kdbus.pool</title>
+    <productname>kdbus.pool</productname>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>kdbus.pool</refentrytitle>
+    <manvolnum>7</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>kdbus.pool</refname>
+    <refpurpose>kdbus pool</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      A pool for data received from the kernel is installed for every
+      <emphasis>connection</emphasis> of the <emphasis>bus</emphasis>, and
+      is sized according to the information stored in the
+      <varname>pool_size</varname> member of <type>struct kdbus_cmd_hello</type>
+      when <constant>KDBUS_CMD_HELLO</constant> is employed. Internally, the
+      pool is segmented into <emphasis>slices</emphasis>, each referenced by its
+      <emphasis>offset</emphasis> in the pool, expressed in <type>bytes</type>.
+      See
+      <citerefentry>
+        <refentrytitle>kdbus.connection</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for more information about <constant>KDBUS_CMD_HELLO</constant>.
+    </para>
+
+    <para>
+      The pool is written to by the kernel when one of the following
+      <emphasis>ioctls</emphasis> is issued:
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>KDBUS_CMD_HELLO</constant></term>
+          <listitem><para>
+            ... to receive details about the bus the connection was made to
+          </para></listitem>
+        </varlistentry>
+        <varlistentry>
+          <term><constant>KDBUS_CMD_RECV</constant></term>
+          <listitem><para>
+            ... to receive a message
+          </para></listitem>
+        </varlistentry>
+        <varlistentry>
+          <term><constant>KDBUS_CMD_LIST</constant></term>
+          <listitem><para>
+            ... to dump the name registry
+          </para></listitem>
+        </varlistentry>
+        <varlistentry>
+          <term><constant>KDBUS_CMD_CONN_INFO</constant></term>
+          <listitem><para>
+            ... to retrieve information on a connection
+          </para></listitem>
+        </varlistentry>
+        <varlistentry>
+          <term><constant>KDBUS_CMD_BUS_CREATOR_INFO</constant></term>
+          <listitem><para>
+            ... to retrieve information about a connection's bus creator
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+
+    </para>
+    <para>
+      The <varname>offset</varname> fields returned by either one of the
+      aforementioned ioctls describe offsets inside the pool. In order to make
+      the slice available for subsequent calls,
+      <constant>KDBUS_CMD_FREE</constant> has to be called on that offset
+      (see below). Otherwise, the pool will fill up, and the connection won't
+      be able to receive any more information through its pool.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Pool slice allocation</title>
+    <para>
+      Pool slices are allocated by the kernel in order to report information
+      back to a task, such as messages, returned name list etc.
+      Allocation of pool slices cannot be initiated by userspace. See
+      <citerefentry>
+        <refentrytitle>kdbus.connection</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      and
+      <citerefentry>
+        <refentrytitle>kdbus.name</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for examples of commands that use the <emphasis>pool</emphasis> to
+      return data.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Accessing the pool memory</title>
+    <para>
+      Memory in the pool is read-only for userspace and may only be written
+      to by the kernel. To read from the pool memory, the caller is expected to
+      <citerefentry>
+        <refentrytitle>mmap</refentrytitle>
+        <manvolnum>2</manvolnum>
+      </citerefentry>
+      the buffer into its task, like this:
+    </para>
+    <programlisting>
+uint8_t *buf = mmap(NULL, size, PROT_READ, MAP_SHARED, conn_fd, 0);
+    </programlisting>
+
+    <para>
+      In order to map the entire pool, the <varname>size</varname> parameter in
+      the example above should be set to the value of the
+      <varname>pool_size</varname> member of
+      <type>struct kdbus_cmd_hello</type> when
+      <constant>KDBUS_CMD_HELLO</constant> was employed to create the
+      connection (see above).
+    </para>
+
+    <para>
+      The <emphasis>file descriptor</emphasis> used to map the memory must be
+      the one that was used to create the <emphasis>connection</emphasis>.
+      In other words, the one that was used to call
+      <constant>KDBUS_CMD_HELLO</constant>. See
+      <citerefentry>
+        <refentrytitle>kdbus.connection</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>
+      for more details.
+    </para>
+
+    <para>
+      Alternatively, instead of mapping the entire pool buffer, only parts
+      of it can be mapped. Every kdbus command that returns an
+      <emphasis>offset</emphasis> (see above) also reports a
+      <emphasis>size</emphasis> along with it, so programs can be written
+      in a way that it only maps portions of the pool to access a specific
+      <emphasis>slice</emphasis>.
+    </para>
+
+    <para>
+      When access to the pool memory is no longer needed, programs should
+      call <function>munmap()</function> on the pointer returned by
+      <function>mmap()</function>.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Freeing pool slices</title>
+    <para>
+      The <constant>KDBUS_CMD_FREE</constant> ioctl is used to free a slice
+      inside the pool, describing an offset that was returned in an
+      <varname>offset</varname> field of another ioctl struct.
+      The <constant>KDBUS_CMD_FREE</constant> command takes a
+      <type>struct kdbus_cmd_free</type> as argument.
+    </para>
+
+<programlisting>
+struct kdbus_cmd_free {
+  __u64 size;
+  __u64 flags;
+  __u64 return_flags;
+  __u64 offset;
+  struct kdbus_item items[0];
+};
+</programlisting>
+
+    <para>The fields in this struct are described below.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term><varname>size</varname></term>
+        <listitem><para>
+          The overall size of the struct, including its items.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>flags</varname></term>
+        <listitem><para>
+          Currently unused.
+          <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+          valid flags. If set, the ioctl will return <errorcode>0</errorcode>,
+          and the <varname>flags</varname> field is set to
+          <constant>0</constant>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>return_flags</varname></term>
+        <listitem><para>
+          Flags returned by the kernel. Currently unused and always set to
+          <constant>0</constant> by the kernel.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>offset</varname></term>
+        <listitem><para>
+          The offset to free, as returned by other ioctls that allocated
+          memory for returned information.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>items</varname></term>
+        <listitem><para>
+          Items to specify further details for the receive command.
+          Currently unused.
+          Unrecognized items are rejected, and the ioctl will fail with
+          <varname>errno</varname> set to <constant>EINVAL</constant>.
+          All items except for
+          <constant>KDBUS_ITEM_NEGOTIATE</constant> (see
+            <citerefentry>
+              <refentrytitle>kdbus.item</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>
+          ) will be rejected.
+        </para></listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Return value</title>
+    <para>
+      On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+      on error, <errorcode>-1</errorcode> is returned, and
+      <varname>errno</varname> is set to indicate the error.
+      If the issued ioctl is illegal for the file descriptor used,
+      <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+    </para>
+
+    <refsect2>
+      <title>
+        <constant>KDBUS_CMD_FREE</constant> may fail with the following
+        errors
+      </title>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>ENXIO</constant></term>
+          <listitem><para>
+            No pool slice found at given offset.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            Invalid flags provided.
+          </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>EINVAL</constant></term>
+          <listitem><para>
+            The offset is valid, but the user is not allowed to free the slice.
+            This happens, for example, if the offset was retrieved with
+            <constant>KDBUS_RECV_PEEK</constant>.
+          </para></listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>See Also</title>
+    <simplelist type="inline">
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.bus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.connection</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.endpoint</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.name</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>mmap</refentrytitle>
+            <manvolnum>2</manvolnum>
+          </citerefentry>
+        </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>munmap</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+    </simplelist>
+  </refsect1>
+</refentry>
diff -Nur linux-4.2/Documentation/kdbus/kdbus.xml linux-4.2-pck/Documentation/kdbus/kdbus.xml
--- linux-4.2/Documentation/kdbus/kdbus.xml	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/kdbus.xml	2015-09-08 00:48:22.211698004 -0300
@@ -0,0 +1,1012 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus">
+
+  <refentryinfo>
+    <title>kdbus</title>
+    <productname>kdbus</productname>
+  </refentryinfo>
+
+  <refmeta>
+    <refentrytitle>kdbus</refentrytitle>
+    <manvolnum>7</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>kdbus</refname>
+    <refpurpose>Kernel Message Bus</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Synopsis</title>
+    <para>
+      kdbus is an inter-process communication bus system controlled by the
+      kernel. It provides user-space with an API to create buses and send
+      unicast and multicast messages to one, or many, peers connected to the
+      same bus. It does not enforce any layout on the transmitted data, but
+      only provides the transport layer used for message interchange between
+      peers.
+    </para>
+    <para>
+      This set of man-pages gives a comprehensive overview of the kernel-level
+      API, with all ioctl commands, associated structs and bit masks. However,
+      most people will not use this API level directly, but rather let one of
+      the high-level abstraction libraries help them integrate D-Bus
+      functionality into their applications.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      kdbus provides a pseudo filesystem called <emphasis>kdbusfs</emphasis>,
+      which is usually mounted on <filename>/sys/fs/kdbus</filename>. Bus
+      primitives can be accessed as files and sub-directories underneath this
+      mount-point. Any advanced operations are done via
+      <function>ioctl()</function> on files created by
+      <emphasis>kdbusfs</emphasis>. Multiple mount-points of
+      <emphasis>kdbusfs</emphasis> are independent of each other. This allows
+      namespacing of kdbus by mounting a new instance of
+      <emphasis>kdbusfs</emphasis> in a new mount-namespace. kdbus calls these
+      mount instances domains and each bus belongs to exactly one domain.
+    </para>
+
+    <para>
+      kdbus was designed as a transport layer for D-Bus, but is in no way
+      limited, nor controlled by the D-Bus protocol specification. The D-Bus
+      protocol is one possible application layer on top of kdbus.
+    </para>
+
+    <para>
+      For the general D-Bus protocol specification, its payload format, its
+      marshaling, and its communication semantics, please refer to the
+      <ulink url="http://dbus.freedesktop.org/doc/dbus-specification.html">
+      D-Bus specification</ulink>.
+    </para>
+
+  </refsect1>
+
+  <refsect1>
+    <title>Terminology</title>
+
+    <refsect2>
+      <title>Domain</title>
+      <para>
+        A domain is a <emphasis>kdbusfs</emphasis> mount-point containing all
+        the bus primitives. Each domain is independent, and separate domains
+        do not affect each other.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Bus</title>
+      <para>
+        A bus is a named object inside a domain. Clients exchange messages
+        over a bus. Multiple buses themselves have no connection to each other;
+        messages can only be exchanged on the same bus. The default endpoint of
+        a bus, to which clients establish connections, is the "bus" file
+        /sys/fs/kdbus/&lt;bus name&gt;/bus.
+        Common operating system setups create one "system bus" per system,
+        and one "user bus" for every logged-in user. Applications or services
+        may create their own private buses. The kernel driver does not
+        distinguish between different bus types, they are all handled the same
+        way. See
+        <citerefentry>
+          <refentrytitle>kdbus.bus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for more details.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Endpoint</title>
+      <para>
+        An endpoint provides a file to talk to a bus. Opening an endpoint
+        creates a new connection to the bus to which the endpoint belongs. All
+        endpoints have unique names and are accessible as files underneath the
+        directory of a bus, e.g., /sys/fs/kdbus/&lt;bus&gt;/&lt;endpoint&gt;
+        Every bus has a default endpoint called "bus".
+        A bus can optionally offer additional endpoints with custom names
+        to provide restricted access to the bus. Custom endpoints carry
+        additional policy which can be used to create sandboxes with
+        locked-down, limited, filtered access to a bus. See
+        <citerefentry>
+          <refentrytitle>kdbus.endpoint</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for more details.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Connection</title>
+      <para>
+        A connection to a bus is created by opening an endpoint file of a
+        bus. Every ordinary client connection has a unique identifier on the
+        bus and can address messages to every other connection on the same
+        bus by using the peer's connection ID as the destination. See
+        <citerefentry>
+          <refentrytitle>kdbus.connection</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for more details.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Pool</title>
+      <para>
+        Each connection allocates a piece of shmem-backed memory that is
+        used to receive messages and answers to ioctl commands from the kernel.
+        It is never used to send anything to the kernel. In order to access that
+        memory, an application must mmap() it into its address space. See
+        <citerefentry>
+          <refentrytitle>kdbus.pool</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for more details.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Well-known Name</title>
+      <para>
+        A connection can, in addition to its implicit unique connection ID,
+        request the ownership of a textual well-known name. Well-known names are
+        noted in reverse-domain notation, such as com.example.service1. A
+        connection that offers a service on a bus is usually reached by its
+        well-known name. An analogy of connection ID and well-known name is an
+        IP address and a DNS name associated with that address. See
+        <citerefentry>
+          <refentrytitle>kdbus.name</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for more details.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Message</title>
+      <para>
+        Connections can exchange messages with other connections by addressing
+        the peers with their connection ID or well-known name. A message
+        consists of a message header with information on how to route the
+        message, and the message payload, which is a logical byte stream of
+        arbitrary size. Messages can carry additional file descriptors to be
+        passed from one connection to another, just like passing file
+        descriptors over UNIX domain sockets. Every connection can specify which
+        set of metadata the kernel should attach to the message when it is
+        delivered to the receiving connection. Metadata contains information
+        like: system time stamps, UID, GID, TID, proc-starttime, well-known
+        names, process comm, process exe, process argv, cgroup, capabilities,
+        seclabel, audit session, loginuid and the connection's human-readable
+        name. See
+        <citerefentry>
+          <refentrytitle>kdbus.message</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for more details.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Item</title>
+      <para>
+        The API of kdbus implements the notion of items, submitted through and
+        returned by most ioctls, and stored inside data structures in the
+        connection's pool. See
+        <citerefentry>
+          <refentrytitle>kdbus.item</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for more details.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Broadcast, signal, filter, match</title>
+      <para>
+        Signals are messages that a receiver opts in for by installing a blob of
+        bytes, called a 'match'. Signal messages must always carry a
+        counter-part blob, called a 'filter', and signals are only delivered to
+        peers which have a match that white-lists the message's filter. Senders
+        of signal messages can use either a single connection ID as receiver,
+        or the special connection ID
+        <constant>KDBUS_DST_ID_BROADCAST</constant> to potentially send it to
+        all connections of a bus, following the logic described above. See
+        <citerefentry>
+          <refentrytitle>kdbus.match</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        and
+        <citerefentry>
+          <refentrytitle>kdbus.message</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for more details.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Policy</title>
+      <para>
+        A policy is a set of rules that define which connections can see, talk
+        to, or register a well-known name on the bus. A policy is attached to
+        buses and custom endpoints, and modified by policy holder connections or
+        owners of custom endpoints. See
+        <citerefentry>
+          <refentrytitle>kdbus.policy</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for more details.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Privileged bus users</title>
+      <para>
+        A user connecting to the bus is considered privileged if it is either
+        the creator of the bus, or if it has the CAP_IPC_OWNER capability flag
+        set. See
+        <citerefentry>
+          <refentrytitle>kdbus.connection</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for more details.
+      </para>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>Bus Layout</title>
+
+    <para>
+      A <emphasis>bus</emphasis> provides and defines an environment that peers
+      can connect to for message interchange. A bus is created via the kdbus
+      control interface and can be modified by the bus creator. It applies the
+      policy that control all bus operations. The bus creator itself does not
+      participate as a peer. To establish a peer
+      <emphasis>connection</emphasis>, you have to open one of the
+      <emphasis>endpoints</emphasis> of a bus. Each bus provides a default
+      endpoint, but further endpoints can be created on-demand. Endpoints are
+      used to apply additional policies for all connections on this endpoint.
+      Thus, they provide additional filters to further restrict access of
+      specific connections to the bus.
+    </para>
+
+    <para>
+      Following, you can see an example bus layout:
+    </para>
+
+    <programlisting><![CDATA[
+                                  Bus Creator
+                                       |
+                                       |
+                                    +-----+
+                                    | Bus |
+                                    +-----+
+                                       |
+                    __________________/ \__________________
+                   /                                       \
+                   |                                       |
+             +----------+                             +----------+
+             | Endpoint |                             | Endpoint |
+             +----------+                             +----------+
+         _________/|\_________                   _________/|\_________
+        /          |          \                 /          |          \
+        |          |          |                 |          |          |
+        |          |          |                 |          |          |
+   Connection  Connection  Connection      Connection  Connection  Connection
+    ]]></programlisting>
+
+  </refsect1>
+
+  <refsect1>
+    <title>Data structures and interconnections</title>
+    <programlisting><![CDATA[
+  +--------------------------------------------------------------------------+
+  | Domain (Mount Point)                                                     |
+  | /sys/fs/kdbus/control                                                    |
+  | +----------------------------------------------------------------------+ |
+  | | Bus (System Bus)                                                     | |
+  | | /sys/fs/kdbus/0-system/                                              | |
+  | | +-------------------------------+ +--------------------------------+ | |
+  | | | Endpoint                      | | Endpoint                       | | |
+  | | | /sys/fs/kdbus/0-system/bus    | | /sys/fs/kdbus/0-system/ep.app  | | |
+  | | +-------------------------------+ +--------------------------------+ | |
+  | | +--------------+ +--------------+ +--------------+ +---------------+ | |
+  | | | Connection   | | Connection   | | Connection   | | Connection    | | |
+  | | | :1.22        | | :1.25        | | :1.55        | | :1.81         | | |
+  | | +--------------+ +--------------+ +--------------+ +---------------+ | |
+  | +----------------------------------------------------------------------+ |
+  |                                                                          |
+  | +----------------------------------------------------------------------+ |
+  | | Bus (User Bus for UID 2702)                                          | |
+  | | /sys/fs/kdbus/2702-user/                                             | |
+  | | +-------------------------------+ +--------------------------------+ | |
+  | | | Endpoint                      | | Endpoint                       | | |
+  | | | /sys/fs/kdbus/2702-user/bus   | | /sys/fs/kdbus/2702-user/ep.app | | |
+  | | +-------------------------------+ +--------------------------------+ | |
+  | | +--------------+ +--------------+ +--------------+ +---------------+ | |
+  | | | Connection   | | Connection   | | Connection   | | Connection    | | |
+  | | | :1.22        | | :1.25        | | :1.55        | | :1.81         | | |
+  | | +--------------+ +--------------+ +--------------------------------+ | |
+  | +----------------------------------------------------------------------+ |
+  +--------------------------------------------------------------------------+
+    ]]></programlisting>
+  </refsect1>
+
+  <refsect1>
+    <title>Metadata</title>
+
+    <refsect2>
+      <title>When metadata is collected</title>
+      <para>
+        kdbus records data about the system in certain situations. Such metadata
+        can refer to the currently active process (creds, PIDs, current user
+        groups, process names and its executable path, cgroup membership,
+        capabilities, security label and audit information), connection
+        information (description string, currently owned names) and time stamps.
+      </para>
+      <para>
+        Metadata is collected at the following times.
+      </para>
+
+      <itemizedlist>
+        <listitem><para>
+          When a bus is created (<constant>KDBUS_CMD_MAKE</constant>),
+          information about the calling task is collected. This data is returned
+          by the kernel via the <constant>KDBUS_CMD_BUS_CREATOR_INFO</constant>
+          call.
+        </para></listitem>
+
+        <listitem>
+          <para>
+            When a connection is created (<constant>KDBUS_CMD_HELLO</constant>),
+            information about the calling task is collected. Alternatively, a
+            privileged connection may provide 'faked' information about
+            credentials, PIDs and security labels which will be stored instead.
+            This data is returned by the kernel as information on a connection
+            (<constant>KDBUS_CMD_CONN_INFO</constant>). Only metadata that a
+            connection allowed to be sent (by setting its bit in
+            <varname>attach_flags_send</varname>) will be exported in this way.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            When a message is sent (<constant>KDBUS_CMD_SEND</constant>),
+            information about the sending task and the sending connection is
+            collected. This metadata will be attached to the message when it
+            arrives in the receiver's pool. If the connection sending the
+            message installed faked credentials (see
+            <citerefentry>
+              <refentrytitle>kdbus.connection</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>),
+            the message will not be augmented by any information about the
+            currently sending task. Note that only metadata that was requested
+            by the receiving connection will be collected and attached to
+            messages.
+          </para>
+        </listitem>
+      </itemizedlist>
+
+      <para>
+        Which metadata items are actually delivered depends on the following
+        sets and masks:
+      </para>
+
+      <itemizedlist>
+        <listitem><para>
+          (a) the system-wide kmod creds mask
+          (module parameter <varname>attach_flags_mask</varname>)
+        </para></listitem>
+
+        <listitem><para>
+          (b) the per-connection send creds mask, set by the connecting client
+        </para></listitem>
+
+        <listitem><para>
+          (c) the per-connection receive creds mask, set by the connecting
+          client
+        </para></listitem>
+
+        <listitem><para>
+          (d) the per-bus minimal creds mask, set by the bus creator
+        </para></listitem>
+
+        <listitem><para>
+          (e) the per-bus owner creds mask, set by the bus creator
+        </para></listitem>
+
+        <listitem><para>
+          (f) the mask specified when querying creds of a bus peer
+        </para></listitem>
+
+        <listitem><para>
+          (g) the mask specified when querying creds of a bus owner
+        </para></listitem>
+      </itemizedlist>
+
+      <para>
+        With the following rules:
+      </para>
+
+      <itemizedlist>
+        <listitem>
+          <para>
+            [1] The creds attached to messages are determined as
+            <constant>a &amp; b &amp; c</constant>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            [2] When connecting to a bus (<constant>KDBUS_CMD_HELLO</constant>),
+            and <constant>~b &amp; d != 0</constant>, the call will fail with,
+            <errorcode>-1</errorcode>, and <varname>errno</varname> is set to
+            <constant>ECONNREFUSED</constant>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            [3] When querying creds of a bus peer, the creds returned are
+            <constant>a &amp; b &amp; f</constant>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            [4] When querying creds of a bus owner, the creds returned are
+            <constant>a &amp; e &amp; g</constant>.
+          </para>
+        </listitem>
+      </itemizedlist>
+
+      <para>
+        Hence, programs might not always get all requested metadata items that
+        it requested. Code must be written so that it can cope with this fact.
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Benefits and heads-up</title>
+      <para>
+        Attaching metadata to messages has two major benefits.
+
+        <itemizedlist>
+          <listitem>
+            <para>
+              Metadata attached to messages is gathered at the moment when the
+              other side calls <constant>KDBUS_CMD_SEND</constant>, or,
+              respectively, then the kernel notification is generated. There is
+              no need for the receiving peer to retrieve information about the
+              task in a second step. This closes a race gap that would otherwise
+              be inherent.
+            </para>
+          </listitem>
+          <listitem>
+            <para>
+              As metadata is delivered along with messages in the same data
+              blob, no extra calls to kernel functions etc. are needed to gather
+              them.
+            </para>
+          </listitem>
+        </itemizedlist>
+
+        Note, however, that collecting metadata does come at a price for
+        performance, so developers should carefully assess which metadata to
+        really opt-in for. For best practice, data that is not needed as part
+        of a message should not be requested by the connection in the first
+        place (see <varname>attach_flags_recv</varname> in
+        <constant>KDBUS_CMD_HELLO</constant>).
+      </para>
+    </refsect2>
+
+    <refsect2>
+      <title>Attach flags for metadata items</title>
+      <para>
+        To let the kernel know which metadata information to attach as items
+        to the aforementioned commands, it uses a bitmask. In those, the
+        following <emphasis>attach flags</emphasis> are currently supported.
+        Both the <varname>attach_flags_recv</varname> and
+        <varname>attach_flags_send</varname> fields of
+        <type>struct kdbus_cmd_hello</type>, as well as the payload of the
+        <constant>KDBUS_ITEM_ATTACH_FLAGS_SEND</constant> and
+        <constant>KDBUS_ITEM_ATTACH_FLAGS_RECV</constant> items follow this
+        scheme.
+      </para>
+
+      <variablelist>
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_TIMESTAMP</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_TIMESTAMP</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_CREDS</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_CREDS</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_PIDS</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_PIDS</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_AUXGROUPS</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_AUXGROUPS</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_NAMES</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_OWNED_NAME</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_TID_COMM</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_TID_COMM</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_PID_COMM</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_PID_COMM</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_EXE</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_EXE</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_CMDLINE</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_CMDLINE</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_CGROUP</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_CGROUP</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_CAPS</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_CAPS</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_SECLABEL</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_SECLABEL</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_AUDIT</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_AUDIT</constant>.
+            </para></listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><constant>KDBUS_ATTACH_CONN_DESCRIPTION</constant></term>
+            <listitem><para>
+              Requests the attachment of an item of type
+              <constant>KDBUS_ITEM_CONN_DESCRIPTION</constant>.
+            </para></listitem>
+        </varlistentry>
+      </variablelist>
+
+      <para>
+        Please refer to
+        <citerefentry>
+          <refentrytitle>kdbus.item</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+        for detailed information about the layout and payload of items and
+        what metadata should be used to.
+      </para>
+    </refsect2>
+  </refsect1>
+
+  <refsect1>
+    <title>The ioctl interface</title>
+
+    <para>
+      As stated in the 'synopsis' section above, application developers are
+      strongly encouraged to use kdbus through one of the high-level D-Bus
+      abstraction libraries, rather than using the low-level API directly.
+    </para>
+
+    <para>
+      kdbus on the kernel level exposes its functions exclusively through
+      <citerefentry>
+        <refentrytitle>ioctl</refentrytitle>
+        <manvolnum>2</manvolnum>
+      </citerefentry>,
+      employed on file descriptors returned by
+      <citerefentry>
+        <refentrytitle>open</refentrytitle>
+        <manvolnum>2</manvolnum>
+      </citerefentry>
+      on pseudo files exposed by
+      <citerefentry>
+        <refentrytitle>kdbus.fs</refentrytitle>
+        <manvolnum>7</manvolnum>
+      </citerefentry>.
+    </para>
+    <para>
+      Following is a list of all the ioctls, along with the command structs
+      they must be used with.
+    </para>
+
+    <informaltable frame="none">
+      <tgroup cols="3" colsep="1">
+        <thead>
+          <row>
+            <entry>ioctl signature</entry>
+            <entry>command</entry>
+            <entry>transported struct</entry>
+          </row>
+        </thead>
+        <tbody>
+          <row>
+            <entry><constant>0x40189500</constant></entry>
+            <entry><constant>KDBUS_CMD_BUS_MAKE</constant></entry>
+            <entry><type>struct kdbus_cmd *</type></entry>
+          </row><row>
+            <entry><constant>0x40189510</constant></entry>
+            <entry><constant>KDBUS_CMD_ENDPOINT_MAKE</constant></entry>
+            <entry><type>struct kdbus_cmd *</type></entry>
+          </row><row>
+            <entry><constant>0xc0609580</constant></entry>
+            <entry><constant>KDBUS_CMD_HELLO</constant></entry>
+            <entry><type>struct kdbus_cmd_hello *</type></entry>
+          </row><row>
+            <entry><constant>0x40189582</constant></entry>
+            <entry><constant>KDBUS_CMD_BYEBYE</constant></entry>
+            <entry><type>struct kdbus_cmd *</type></entry>
+          </row><row>
+            <entry><constant>0x40389590</constant></entry>
+            <entry><constant>KDBUS_CMD_SEND</constant></entry>
+            <entry><type>struct kdbus_cmd_send *</type></entry>
+          </row><row>
+            <entry><constant>0x80409591</constant></entry>
+            <entry><constant>KDBUS_CMD_RECV</constant></entry>
+            <entry><type>struct kdbus_cmd_recv *</type></entry>
+          </row><row>
+            <entry><constant>0x40209583</constant></entry>
+            <entry><constant>KDBUS_CMD_FREE</constant></entry>
+            <entry><type>struct kdbus_cmd_free *</type></entry>
+          </row><row>
+            <entry><constant>0x401895a0</constant></entry>
+            <entry><constant>KDBUS_CMD_NAME_ACQUIRE</constant></entry>
+            <entry><type>struct kdbus_cmd *</type></entry>
+          </row><row>
+            <entry><constant>0x401895a1</constant></entry>
+            <entry><constant>KDBUS_CMD_NAME_RELEASE</constant></entry>
+            <entry><type>struct kdbus_cmd *</type></entry>
+          </row><row>
+            <entry><constant>0x80289586</constant></entry>
+            <entry><constant>KDBUS_CMD_LIST</constant></entry>
+            <entry><type>struct kdbus_cmd_list *</type></entry>
+          </row><row>
+            <entry><constant>0x80309584</constant></entry>
+            <entry><constant>KDBUS_CMD_CONN_INFO</constant></entry>
+            <entry><type>struct kdbus_cmd_info *</type></entry>
+          </row><row>
+            <entry><constant>0x40209551</constant></entry>
+            <entry><constant>KDBUS_CMD_UPDATE</constant></entry>
+            <entry><type>struct kdbus_cmd *</type></entry>
+          </row><row>
+            <entry><constant>0x80309585</constant></entry>
+            <entry><constant>KDBUS_CMD_BUS_CREATOR_INFO</constant></entry>
+            <entry><type>struct kdbus_cmd_info *</type></entry>
+          </row><row>
+            <entry><constant>0x40189511</constant></entry>
+            <entry><constant>KDBUS_CMD_ENDPOINT_UPDATE</constant></entry>
+            <entry><type>struct kdbus_cmd *</type></entry>
+          </row><row>
+            <entry><constant>0x402095b0</constant></entry>
+            <entry><constant>KDBUS_CMD_MATCH_ADD</constant></entry>
+            <entry><type>struct kdbus_cmd_match *</type></entry>
+          </row><row>
+            <entry><constant>0x402095b1</constant></entry>
+            <entry><constant>KDBUS_CMD_MATCH_REMOVE</constant></entry>
+            <entry><type>struct kdbus_cmd_match *</type></entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </informaltable>
+
+    <para>
+      Depending on the type of <emphasis>kdbusfs</emphasis> node that was
+      opened and what ioctls have been executed on a file descriptor before,
+      a different sub-set of ioctl commands is allowed.
+    </para>
+
+    <itemizedlist>
+      <listitem>
+        <para>
+          On a file descriptor resulting from opening a
+          <emphasis>control node</emphasis>, only the
+          <constant>KDBUS_CMD_BUS_MAKE</constant> ioctl may be executed.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          On a file descriptor resulting from opening a
+          <emphasis>bus endpoint node</emphasis>, only the
+          <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> and
+          <constant>KDBUS_CMD_HELLO</constant> ioctls may be executed.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          A file descriptor that was used to create a bus
+          (via <constant>KDBUS_CMD_BUS_MAKE</constant>) is called a
+          <emphasis>bus owner</emphasis> file descriptor. The bus will be
+          active as long as the file descriptor is kept open.
+          A bus owner file descriptor can not be used to
+          employ any further ioctls. As soon as
+          <citerefentry>
+            <refentrytitle>close</refentrytitle>
+            <manvolnum>2</manvolnum>
+          </citerefentry>
+          is called on it, the bus will be shut down, along will all associated
+          endpoints and connections. See
+          <citerefentry>
+            <refentrytitle>kdbus.bus</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>
+          for more details.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          A file descriptor that was used to create an endpoint
+          (via <constant>KDBUS_CMD_ENDPOINT_MAKE</constant>) is called an
+          <emphasis>endpoint owner</emphasis> file descriptor. The endpoint
+          will be active as long as the file descriptor is kept open.
+          An endpoint owner file descriptor can only be used
+          to update details of an endpoint through the
+          <constant>KDBUS_CMD_ENDPOINT_UPDATE</constant> ioctl. As soon as
+          <citerefentry>
+            <refentrytitle>close</refentrytitle>
+            <manvolnum>2</manvolnum>
+          </citerefentry>
+          is called on it, the endpoint will be removed from the bus, and all
+          connections that are connected to the bus through it are shut down.
+          See
+          <citerefentry>
+            <refentrytitle>kdbus.endpoint</refentrytitle>
+            <manvolnum>7</manvolnum>
+          </citerefentry>
+          for more details.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          A file descriptor that was used to create a connection
+          (via <constant>KDBUS_CMD_HELLO</constant>) is called a
+          <emphasis>connection owner</emphasis> file descriptor. The connection
+          will be active as long as the file descriptor is kept open.
+          A connection owner file descriptor may be used to
+          issue any of the following ioctls.
+        </para>
+
+        <itemizedlist>
+          <listitem><para>
+            <constant>KDBUS_CMD_UPDATE</constant> to tweak details of the
+            connection. See
+            <citerefentry>
+              <refentrytitle>kdbus.connection</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+          </para></listitem>
+
+          <listitem><para>
+            <constant>KDBUS_CMD_BYEBYE</constant> to shut down a connection
+            without losing messages. See
+            <citerefentry>
+              <refentrytitle>kdbus.connection</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+          </para></listitem>
+
+          <listitem><para>
+            <constant>KDBUS_CMD_FREE</constant> to free a slice of memory in
+            the pool. See
+            <citerefentry>
+              <refentrytitle>kdbus.pool</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+          </para></listitem>
+
+          <listitem><para>
+            <constant>KDBUS_CMD_CONN_INFO</constant> to retrieve information
+            on other connections on the bus. See
+            <citerefentry>
+              <refentrytitle>kdbus.connection</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+          </para></listitem>
+
+          <listitem><para>
+            <constant>KDBUS_CMD_BUS_CREATOR_INFO</constant> to retrieve
+            information on the bus creator. See
+            <citerefentry>
+              <refentrytitle>kdbus.connection</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+          </para></listitem>
+
+          <listitem><para>
+            <constant>KDBUS_CMD_LIST</constant> to retrieve a list of
+            currently active well-known names and unique IDs on the bus. See
+            <citerefentry>
+              <refentrytitle>kdbus.name</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+          </para></listitem>
+
+          <listitem><para>
+            <constant>KDBUS_CMD_SEND</constant> and
+            <constant>KDBUS_CMD_RECV</constant> to send or receive a message.
+            See
+            <citerefentry>
+              <refentrytitle>kdbus.message</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+          </para></listitem>
+
+          <listitem><para>
+            <constant>KDBUS_CMD_NAME_ACQUIRE</constant> and
+            <constant>KDBUS_CMD_NAME_RELEASE</constant> to acquire or release
+            a well-known name on the bus. See
+            <citerefentry>
+              <refentrytitle>kdbus.name</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+          </para></listitem>
+
+          <listitem><para>
+            <constant>KDBUS_CMD_MATCH_ADD</constant> and
+            <constant>KDBUS_CMD_MATCH_REMOVE</constant> to add or remove
+            a match for signal messages. See
+            <citerefentry>
+              <refentrytitle>kdbus.match</refentrytitle>
+              <manvolnum>7</manvolnum>
+            </citerefentry>.
+          </para></listitem>
+        </itemizedlist>
+      </listitem>
+    </itemizedlist>
+
+    <para>
+      These ioctls, along with the structs they transport, are explained in
+      detail in the other documents linked to in the "See Also" section below.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>See Also</title>
+    <simplelist type="inline">
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.bus</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.connection</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.endpoint</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.fs</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.item</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.message</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.name</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>kdbus.pool</refentrytitle>
+          <manvolnum>7</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>ioctl</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>mmap</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>open</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <citerefentry>
+          <refentrytitle>close</refentrytitle>
+          <manvolnum>2</manvolnum>
+        </citerefentry>
+      </member>
+      <member>
+        <ulink url="http://freedesktop.org/wiki/Software/dbus">D-Bus</ulink>
+      </member>
+    </simplelist>
+  </refsect1>
+
+</refentry>
diff -Nur linux-4.2/Documentation/kdbus/stylesheet.xsl linux-4.2-pck/Documentation/kdbus/stylesheet.xsl
--- linux-4.2/Documentation/kdbus/stylesheet.xsl	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/kdbus/stylesheet.xsl	2015-09-08 00:48:22.211698004 -0300
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<stylesheet xmlns="http://www.w3.org/1999/XSL/Transform" version="1.0">
+	<param name="chunk.quietly">1</param>
+	<param name="funcsynopsis.style">ansi</param>
+	<param name="funcsynopsis.tabular.threshold">80</param>
+	<param name="callout.graphics">0</param>
+	<param name="paper.type">A4</param>
+	<param name="generate.section.toc.level">2</param>
+	<param name="use.id.as.filename">1</param>
+	<param name="citerefentry.link">1</param>
+	<strip-space elements="*"/>
+	<template name="generate.citerefentry.link">
+		<value-of select="refentrytitle"/>
+		<text>.html</text>
+	</template>
+</stylesheet>
diff -Nur linux-4.2/Documentation/kernel-parameters.txt linux-4.2-pck/Documentation/kernel-parameters.txt
--- linux-4.2/Documentation/kernel-parameters.txt	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/Documentation/kernel-parameters.txt	2015-09-08 11:20:31.767046152 -0300
@@ -3896,6 +3896,9 @@
 					HIGHMEM regardless of setting
 					of CONFIG_HIGHPTE.
 
+	uuid_debug=	(Boolean) whether to enable debugging of TuxOnIce's
+			uuid support.
+
 	vdso=		[X86,SH]
 			On X86_32, this is an alias for vdso32=.  Otherwise:
 
diff -Nur linux-4.2/Documentation/power/tuxonice-internals.txt linux-4.2-pck/Documentation/power/tuxonice-internals.txt
--- linux-4.2/Documentation/power/tuxonice-internals.txt	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/power/tuxonice-internals.txt	2015-09-08 11:20:31.793711515 -0300
@@ -0,0 +1,532 @@
+		   TuxOnIce 4.0 Internal Documentation.
+			Updated to 23 March 2015
+
+(Please note that incremental image support mentioned in this document is work
+in progress. This document may need updating prior to the actual release of
+4.0!)
+
+1.  Introduction.
+
+    TuxOnIce 4.0 is an addition to the Linux Kernel, designed to
+    allow the user to quickly shutdown and quickly boot a computer, without
+    needing to close documents or programs. It is equivalent to the
+    hibernate facility in some laptops. This implementation, however,
+    requires no special BIOS or hardware support.
+
+    The code in these files is based upon the original implementation
+    prepared by Gabor Kuti and additional work by Pavel Machek and a
+    host of others. This code has been substantially reworked by Nigel
+    Cunningham, again with the help and testing of many others, not the
+    least of whom are Bernard Blackham and Michael Frank. At its heart,
+    however, the operation is essentially the same as Gabor's version.
+
+2.  Overview of operation.
+
+    The basic sequence of operations is as follows:
+
+	a. Quiesce all other activity.
+	b. Ensure enough memory and storage space are available, and attempt
+	   to free memory/storage if necessary.
+	c. Allocate the required memory and storage space.
+	d. Write the image.
+	e. Power down.
+
+    There are a number of complicating factors which mean that things are
+    not as simple as the above would imply, however...
+
+    o The activity of each process must be stopped at a point where it will
+    not be holding locks necessary for saving the image, or unexpectedly
+    restart operations due to something like a timeout and thereby make
+    our image inconsistent.
+
+    o It is desirous that we sync outstanding I/O to disk before calculating
+    image statistics. This reduces corruption if one should suspend but
+    then not resume, and also makes later parts of the operation safer (see
+    below).
+
+    o We need to get as close as we can to an atomic copy of the data.
+    Inconsistencies in the image will result in inconsistent memory contents at
+    resume time, and thus in instability of the system and/or file system
+    corruption. This would appear to imply a maximum image size of one half of
+    the amount of RAM, but we have a solution... (again, below).
+
+    o In 2.6 and later, we choose to play nicely with the other suspend-to-disk
+    implementations.
+
+3.  Detailed description of internals.
+
+    a. Quiescing activity.
+
+    Safely quiescing the system is achieved using three separate but related
+    aspects.
+
+    First, we use the vanilla kerne's support for freezing processes. This code
+    is based on the observation that the vast majority of processes don't need
+    to run during suspend. They can be 'frozen'. The kernel therefore
+    implements a refrigerator routine, which processes enter and in which they
+    remain until the cycle is complete. Processes enter the refrigerator via
+    try_to_freeze() invocations at appropriate places.  A process cannot be
+    frozen in any old place. It must not be holding locks that will be needed
+    for writing the image or freezing other processes. For this reason,
+    userspace processes generally enter the refrigerator via the signal
+    handling code, and kernel threads at the place in their event loops where
+    they drop locks and yield to other processes or sleep. The task of freezing
+    processes is complicated by the fact that there can be interdependencies
+    between processes. Freezing process A before process B may mean that
+    process B cannot be frozen, because it stops at waiting for process A
+    rather than in the refrigerator. This issue is seen where userspace waits
+    on freezeable kernel threads or fuse filesystem threads. To address this
+    issue, we implement the following algorithm for quiescing activity:
+
+	- Freeze filesystems (including fuse - userspace programs starting
+		new requests are immediately frozen; programs already running
+		requests complete their work before being frozen in the next
+		step)
+	- Freeze userspace
+	- Thaw filesystems (this is safe now that userspace is frozen and no
+		fuse requests are outstanding).
+	- Invoke sys_sync (noop on fuse).
+	- Freeze filesystems
+	- Freeze kernel threads
+
+    If we need to free memory, we thaw kernel threads and filesystems, but not
+    userspace. We can then free caches without worrying about deadlocks due to
+    swap files being on frozen filesystems or such like.
+
+    b. Ensure enough memory & storage are available.
+
+    We have a number of constraints to meet in order to be able to successfully
+    suspend and resume.
+
+    First, the image will be written in two parts, described below. One of
+    these parts needs to have an atomic copy made, which of course implies a
+    maximum size of one half of the amount of system memory. The other part
+    ('pageset') is not atomically copied, and can therefore be as large or
+    small as desired.
+
+    Second, we have constraints on the amount of storage available. In these
+    calculations, we may also consider any compression that will be done. The
+    cryptoapi module allows the user to configure an expected compression ratio.
+
+    Third, the user can specify an arbitrary limit on the image size, in
+    megabytes. This limit is treated as a soft limit, so that we don't fail the
+    attempt to suspend if we cannot meet this constraint.
+
+    c. Allocate the required memory and storage space.
+
+    Having done the initial freeze, we determine whether the above constraints
+    are met, and seek to allocate the metadata for the image. If the constraints
+    are not met, or we fail to allocate the required space for the metadata, we
+    seek to free the amount of memory that we calculate is needed and try again.
+    We allow up to four iterations of this loop before aborting the cycle. If
+    we do fail, it should only be because of a bug in TuxOnIce's calculations
+    or the vanilla kernel code for freeing memory.
+
+    These steps are merged together in the prepare_image function, found in
+    prepare_image.c. The functions are merged because of the cyclical nature
+    of the problem of calculating how much memory and storage is needed. Since
+    the data structures containing the information about the image must
+    themselves take memory and use storage, the amount of memory and storage
+    required changes as we prepare the image. Since the changes are not large,
+    only one or two iterations will be required to achieve a solution.
+
+    The recursive nature of the algorithm is miminised by keeping user space
+    frozen while preparing the image, and by the fact that our records of which
+    pages are to be saved and which pageset they are saved in use bitmaps (so
+    that changes in number or fragmentation of the pages to be saved don't
+    feedback via changes in the amount of memory needed for metadata). The
+    recursiveness is thus limited to any extra slab pages allocated to store the
+    extents that record storage used, and the effects of seeking to free memory.
+
+    d. Write the image.
+
+    We previously mentioned the need to create an atomic copy of the data, and
+    the half-of-memory limitation that is implied in this. This limitation is
+    circumvented by dividing the memory to be saved into two parts, called
+    pagesets.
+
+    Pageset2 contains most of the page cache - the pages on the active and
+    inactive LRU lists that aren't needed or modified while TuxOnIce is
+    running, so they can be safely written without an atomic copy. They are
+    therefore saved first and reloaded last. While saving these pages,
+    TuxOnIce carefully ensures that the work of writing the pages doesn't make
+    the image inconsistent. With the support for Kernel (Video) Mode Setting
+    going into the kernel at the time of writing, we need to check for pages
+    on the LRU that are used by KMS, and exclude them from pageset2. They are
+    atomically copied as part of pageset 1.
+
+    Once pageset2 has been saved, we prepare to do the atomic copy of remaining
+    memory. As part of the preparation, we power down drivers, thereby providing
+    them with the opportunity to have their state recorded in the image. The
+    amount of memory allocated by drivers for this is usually negligible, but if
+    DRI is in use, video drivers may require significants amounts. Ideally we
+    would be able to query drivers while preparing the image as to the amount of
+    memory they will need. Unfortunately no such mechanism exists at the time of
+    writing. For this reason, TuxOnIce allows the user to set an
+    'extra_pages_allowance', which is used to seek to ensure sufficient memory
+    is available for drivers at this point. TuxOnIce also lets the user set this
+    value to 0. In this case, a test driver suspend is done while preparing the
+    image, and the difference (plus a margin) used instead. TuxOnIce will also
+    automatically restart the hibernation process (twice at most) if it finds
+    that the extra pages allowance is not sufficient. It will then use what was
+    actually needed (plus a margin, again). Failure to hibernate should thus
+    be an extremely rare occurence.
+
+    Having suspended the drivers, we save the CPU context before making an
+    atomic copy of pageset1, resuming the drivers and saving the atomic copy.
+    After saving the two pagesets, we just need to save our metadata before
+    powering down.
+
+    As we mentioned earlier, the contents of pageset2 pages aren't needed once
+    they've been saved. We therefore use them as the destination of our atomic
+    copy. In the unlikely event that pageset1 is larger, extra pages are
+    allocated while the image is being prepared. This is normally only a real
+    possibility when the system has just been booted and the page cache is
+    small.
+
+    This is where we need to be careful about syncing, however. Pageset2 will
+    probably contain filesystem meta data. If this is overwritten with pageset1
+    and then a sync occurs, the filesystem will be corrupted - at least until
+    resume time and another sync of the restored data. Since there is a
+    possibility that the user might not resume or (may it never be!) that
+    TuxOnIce might oops, we do our utmost to avoid syncing filesystems after
+    copying pageset1.
+
+    e. Incremental images
+
+    TuxOnIce 4.0 introduces a new incremental image mode which changes things a
+    little. When incremental images are enabled, we save a 'normal' image the
+    first time we hibernate. One resume however, we do not free the image or
+    the associated storage. Instead, it is retained until the next attempt at
+    hibernating and a mechanism is enabled which is used to track which pages
+    of memory are modified between the two cycles. The modified pages can then
+    be added to the existing image, rather than unmodified pages being saved
+    again unnecessarily.
+
+    Incremental image support is available in 64 bit Linux only, due to the
+    requirement for extra page flags.
+
+    This support is accomplished in the following way:
+
+    1) Tracking of pages.
+
+    The tracking of changed pages is accomplished using the page fault
+    mechanism. When we reach a point at which we want to start tracking
+    changes, most pages are marked read-only and also flagged as being
+    read-only because of this support. Since this cannot happen for every page
+    of RAM, some are marked as untracked and always treated as modified whn
+    preparing an incremental iamge. When a process attempts to modify a page
+    that is marked read-only in this way, a page fault occurs, with TuxOnIce
+    code marking the page writable and dirty before allowing the write to
+    continue. In this way, the effect of incremental images on performance is
+    minimised - a page only causes a fault once. Small modifications to the
+    page allocator further reduce the number of faults that occur - free pages
+    are not tracked; they are made writable and marked as dirty as part of
+    being allocated.
+
+    2) Saving the incremental image / atomicity.
+
+    The page fault mechanism is also used to improve the means by which
+    atomicity of the image is acheived. When it is time to do an atomic copy,
+    the flags for pages are reset, with the result being that it is no longer
+    necessary for us to do an atomic of pageset1. Instead, we normally write
+    the uncopied pages to disk. When an attempt is made to modify a page that
+    has not yet been saved, the page-fault mechanism makes a copy of the page
+    prior to allowing the write. This copy is then written to disk. Likewise,
+    on resume, if a process attempts to write to a page that has been read
+    while the rest of the image is still being loaded, a copy of that page is
+    made prior to the write being allowed. At the end of loading the image,
+    modified pages can thus be restored to their 'atomic copy' contents prior
+    to restarting normal operation. We also mark pages that are yet to be read
+    as invalid PFNs, so that we can capture as a bug any attempt by a
+    half-restored kernel to access a page that hasn't yet been reloaded.
+
+    f. Power down.
+
+    Powering down uses standard kernel routines. TuxOnIce supports powering down
+    using the ACPI S3, S4 and S5 methods or the kernel's non-ACPI power-off.
+    Supporting suspend to ram (S3) as a power off option might sound strange,
+    but it allows the user to quickly get their system up and running again if
+    the battery doesn't run out (we just need to re-read the overwritten pages)
+    and if the battery does run out (or the user removes power), they can still
+    resume.
+
+4.  Data Structures.
+
+    TuxOnIce uses three main structures to store its metadata and configuration
+    information:
+
+    a) Pageflags bitmaps.
+
+    TuxOnIce records which pages will be in pageset1, pageset2, the destination
+    of the atomic copy and the source of the atomically restored image using
+    bitmaps. The code used is that written for swsusp, with small improvements
+    to match TuxOnIce's requirements.
+
+    The pageset1 bitmap is thus easily stored in the image header for use at
+    resume time.
+
+    As mentioned above, using bitmaps also means that the amount of memory and
+    storage required for recording the above information is constant. This
+    greatly simplifies the work of preparing the image. In earlier versions of
+    TuxOnIce, extents were used to record which pages would be stored. In that
+    case, however, eating memory could result in greater fragmentation of the
+    lists of pages, which in turn required more memory to store the extents and
+    more storage in the image header. These could in turn require further
+    freeing of memory, and another iteration. All of this complexity is removed
+    by having bitmaps.
+
+    Bitmaps also make a lot of sense because TuxOnIce only ever iterates
+    through the lists. There is therefore no cost to not being able to find the
+    nth page in order 0 time. We only need to worry about the cost of finding
+    the n+1th page, given the location of the nth page. Bitwise optimisations
+    help here.
+
+    b) Extents for block data.
+
+    TuxOnIce supports writing the image to multiple block devices. In the case
+    of swap, multiple partitions and/or files may be in use, and we happily use
+    them all (with the exception of compcache pages, which we allocate but do
+    not use). This use of multiple block devices is accomplished as follows:
+
+    Whatever the actual source of the allocated storage, the destination of the
+    image can be viewed in terms of one or more block devices, and on each
+    device, a list of sectors. To simplify matters, we only use contiguous,
+    PAGE_SIZE aligned sectors, like the swap code does.
+
+    Since sector numbers on each bdev may well not start at 0, it makes much
+    more sense to use extents here. Contiguous ranges of pages can thus be
+    represented in the extents by contiguous values.
+
+    Variations in block size are taken account of in transforming this data
+    into the parameters for bio submission.
+
+    We can thus implement a layer of abstraction wherein the core of TuxOnIce
+    doesn't have to worry about which device we're currently writing to or
+    where in the device we are. It simply requests that the next page in the
+    pageset or header be written, leaving the details to this lower layer.
+    The lower layer remembers where in the sequence of devices and blocks each
+    pageset starts. The header always starts at the beginning of the allocated
+    storage.
+
+    So extents are:
+
+    struct extent {
+      unsigned long minimum, maximum;
+      struct extent *next;
+    }
+
+    These are combined into chains of extents for a device:
+
+    struct extent_chain {
+      int size; /* size of the extent ie sum (max-min+1) */
+      int allocs, frees;
+      char *name;
+      struct extent *first, *last_touched;
+    };
+
+    For each bdev, we need to store a little more info (simplified definition):
+
+    struct toi_bdev_info {
+       struct block_device *bdev;
+
+       char uuid[17];
+       dev_t dev_t;
+       int bmap_shift;
+       int blocks_per_page;
+    };
+
+    The uuid is the main means used to identify the device in the storage
+    image. This means we can cope with the dev_t representation of a device
+    changing between saving the image and restoring it, as may happen on some
+    bioses or in the LVM case.
+
+    bmap_shift and blocks_per_page apply the effects of variations in blocks
+    per page settings for the filesystem and underlying bdev. For most
+    filesystems, these are the same, but for xfs, they can have independant
+    values.
+
+    Combining these two structures together, we have everything we need to
+    record what devices and what blocks on each device are being used to
+    store the image, and to submit i/o using bio_submit.
+
+    The last elements in the picture are a means of recording how the storage
+    is being used.
+
+    We do this first and foremost by implementing a layer of abstraction on
+    top of the devices and extent chains which allows us to view however many
+    devices there might be as one long storage tape, with a single 'head' that
+    tracks a 'current position' on the tape:
+
+    struct extent_iterate_state {
+      struct extent_chain *chains;
+      int num_chains;
+      int current_chain;
+      struct extent *current_extent;
+      unsigned long current_offset;
+    };
+
+    That is, *chains points to an array of size num_chains of extent chains.
+    For the filewriter, this is always a single chain. For the swapwriter, the
+    array is of size MAX_SWAPFILES.
+
+    current_chain, current_extent and current_offset thus point to the current
+    index in the chains array (and into a matching array of struct
+    suspend_bdev_info), the current extent in that chain (to optimise access),
+    and the current value in the offset.
+
+    The image is divided into three parts:
+    - The header
+    - Pageset 1
+    - Pageset 2
+
+    The header always starts at the first device and first block. We know its
+    size before we begin to save the image because we carefully account for
+    everything that will be stored in it.
+
+    The second pageset (LRU) is stored first. It begins on the next page after
+    the end of the header.
+
+    The first pageset is stored second. It's start location is only known once
+    pageset2 has been saved, since pageset2 may be compressed as it is written.
+    This location is thus recorded at the end of saving pageset2. It is page
+    aligned also.
+
+    Since this information is needed at resume time, and the location of extents
+    in memory will differ at resume time, this needs to be stored in a portable
+    way:
+
+    struct extent_iterate_saved_state {
+        int chain_num;
+        int extent_num;
+        unsigned long offset;
+    };
+
+    We can thus implement a layer of abstraction wherein the core of TuxOnIce
+    doesn't have to worry about which device we're currently writing to or
+    where in the device we are. It simply requests that the next page in the
+    pageset or header be written, leaving the details to this layer, and
+    invokes the routines to remember and restore the position, without having
+    to worry about the details of how the data is arranged on disk or such like.
+
+    c) Modules
+
+    One aim in designing TuxOnIce was to make it flexible. We wanted to allow
+    for the implementation of different methods of transforming a page to be
+    written to disk and different methods of getting the pages stored.
+
+    In early versions (the betas and perhaps Suspend1), compression support was
+    inlined in the image writing code, and the data structures and code for
+    managing swap were intertwined with the rest of the code. A number of people
+    had expressed interest in implementing image encryption, and alternative
+    methods of storing the image.
+
+    In order to achieve this, TuxOnIce was given a modular design.
+
+    A module is a single file which encapsulates the functionality needed
+    to transform a pageset of data (encryption or compression, for example),
+    or to write the pageset to a device. The former type of module is called
+    a 'page-transformer', the later a 'writer'.
+
+    Modules are linked together in pipeline fashion. There may be zero or more
+    page transformers in a pipeline, and there is always exactly one writer.
+    The pipeline follows this pattern:
+
+		---------------------------------
+		|          TuxOnIce Core        |
+		---------------------------------
+				|
+				|
+		---------------------------------
+		|	Page transformer 1	|
+		---------------------------------
+				|
+				|
+		---------------------------------
+		|	Page transformer 2	|
+		---------------------------------
+				|
+				|
+		---------------------------------
+		|            Writer		|
+		---------------------------------
+
+    During the writing of an image, the core code feeds pages one at a time
+    to the first module. This module performs whatever transformations it
+    implements on the incoming data, completely consuming the incoming data and
+    feeding output in a similar manner to the next module.
+
+    All routines are SMP safe, and the final result of the transformations is
+    written with an index (provided by the core) and size of the output by the
+    writer. As a result, we can have multithreaded I/O without needing to
+    worry about the sequence in which pages are written (or read).
+
+    During reading, the pipeline works in the reverse direction. The core code
+    calls the first module with the address of a buffer which should be filled.
+    (Note that the buffer size is always PAGE_SIZE at this time). This module
+    will in turn request data from the next module and so on down until the
+    writer is made to read from the stored image.
+
+    Part of definition of the structure of a module thus looks like this:
+
+        int (*rw_init) (int rw, int stream_number);
+        int (*rw_cleanup) (int rw);
+        int (*write_chunk) (struct page *buffer_page);
+        int (*read_chunk) (struct page *buffer_page, int sync);
+
+    It should be noted that the _cleanup routine may be called before the
+    full stream of data has been read or written. While writing the image,
+    the user may (depending upon settings) choose to abort suspending, and
+    if we are in the midst of writing the last portion of the image, a portion
+    of the second pageset may be reread. This may also happen if an error
+    occurs and we seek to abort the process of writing the image.
+
+    The modular design is also useful in a number of other ways. It provides
+    a means where by we can add support for:
+
+    - providing overall initialisation and cleanup routines;
+    - serialising configuration information in the image header;
+    - providing debugging information to the user;
+    - determining memory and image storage requirements;
+    - dis/enabling components at run-time;
+    - configuring the module (see below);
+
+    ...and routines for writers specific to their work:
+    - Parsing a resume= location;
+    - Determining whether an image exists;
+    - Marking a resume as having been attempted;
+    - Invalidating an image;
+
+    Since some parts of the core - the user interface and storage manager
+    support - have use for some of these functions, they are registered as
+    'miscellaneous' modules as well.
+
+    d) Sysfs data structures.
+
+    This brings us naturally to support for configuring TuxOnIce. We desired to
+    provide a way to make TuxOnIce as flexible and configurable as possible.
+    The user shouldn't have to reboot just because they want to now hibernate to
+    a file instead of a partition, for example.
+
+    To accomplish this, TuxOnIce implements a very generic means whereby the
+    core and modules can register new sysfs entries. All TuxOnIce entries use
+    a single _store and _show routine, both of which are found in
+    tuxonice_sysfs.c in the kernel/power directory. These routines handle the
+    most common operations - getting and setting the values of bits, integers,
+    longs, unsigned longs and strings in one place, and allow overrides for
+    customised get and set options as well as side-effect routines for all
+    reads and writes.
+
+    When combined with some simple macros, a new sysfs entry can then be defined
+    in just a couple of lines:
+
+        SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
+                        2048, 0, NULL),
+
+    This defines a sysfs entry named "progress_granularity" which is rw and
+    allows the user to access an integer stored at &progress_granularity, giving
+    it a value between 1 and 2048 inclusive.
+
+    Sysfs entries are registered under /sys/power/tuxonice, and entries for
+    modules are located in a subdirectory named after the module.
+
diff -Nur linux-4.2/Documentation/power/tuxonice.txt linux-4.2-pck/Documentation/power/tuxonice.txt
--- linux-4.2/Documentation/power/tuxonice.txt	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/power/tuxonice.txt	2015-09-08 11:20:31.793711515 -0300
@@ -0,0 +1,948 @@
+	--- TuxOnIce, version 3.0 ---
+
+1.  What is it?
+2.  Why would you want it?
+3.  What do you need to use it?
+4.  Why not just use the version already in the kernel?
+5.  How do you use it?
+6.  What do all those entries in /sys/power/tuxonice do?
+7.  How do you get support?
+8.  I think I've found a bug. What should I do?
+9.  When will XXX be supported?
+10  How does it work?
+11. Who wrote TuxOnIce?
+
+1. What is it?
+
+   Imagine you're sitting at your computer, working away. For some reason, you
+   need to turn off your computer for a while - perhaps it's time to go home
+   for the day. When you come back to your computer next, you're going to want
+   to carry on where you left off. Now imagine that you could push a button and
+   have your computer store the contents of its memory to disk and power down.
+   Then, when you next start up your computer, it loads that image back into
+   memory and you can carry on from where you were, just as if you'd never
+   turned the computer off. You have far less time to start up, no reopening of
+   applications or finding what directory you put that file in yesterday.
+   That's what TuxOnIce does.
+
+   TuxOnIce has a long heritage. It began life as work by Gabor Kuti, who,
+   with some help from Pavel Machek, got an early version going in 1999. The
+   project was then taken over by Florent Chabaud while still in alpha version
+   numbers. Nigel Cunningham came on the scene when Florent was unable to
+   continue, moving the project into betas, then 1.0, 2.0 and so on up to
+   the present series. During the 2.0 series, the name was contracted to
+   Suspend2 and the website suspend2.net created. Beginning around July 2007,
+   a transition to calling the software TuxOnIce was made, to seek to help
+   make it clear that TuxOnIce is more concerned with hibernation than suspend
+   to ram.
+
+   Pavel Machek's swsusp code, which was merged around 2.5.17 retains the
+   original name, and was essentially a fork of the beta code until Rafael
+   Wysocki came on the scene in 2005 and began to improve it further.
+
+2. Why would you want it?
+
+   Why wouldn't you want it?
+
+   Being able to save the state of your system and quickly restore it improves
+   your productivity - you get a useful system in far less time than through
+   the normal boot process. You also get to be completely 'green', using zero
+   power, or as close to that as possible (the computer may still provide
+   minimal power to some devices, so they can initiate a power on, but that
+   will be the same amount of power as would be used if you told the computer
+   to shutdown.
+
+3. What do you need to use it?
+
+   a. Kernel Support.
+
+   i) The TuxOnIce patch.
+
+   TuxOnIce is part of the Linux Kernel. This version is not part of Linus's
+   2.6 tree at the moment, so you will need to download the kernel source and
+   apply the latest patch. Having done that, enable the appropriate options in
+   make [menu|x]config (under Power Management Options - look for "Enhanced
+   Hibernation"), compile and install your kernel. TuxOnIce works with SMP,
+   Highmem, preemption, fuse filesystems, x86-32, PPC and x86_64.
+
+   TuxOnIce patches are available from http://tuxonice.net.
+
+   ii) Compression support.
+
+   Compression support is implemented via the cryptoapi. You will therefore want
+   to select any Cryptoapi transforms that you want to use on your image from
+   the Cryptoapi menu while configuring your kernel. We recommend the use of the
+   LZO compression method - it is very fast and still achieves good compression.
+
+   You can also tell TuxOnIce to write its image to an encrypted and/or
+   compressed filesystem/swap partition. In that case, you don't need to do
+   anything special for TuxOnIce when it comes to kernel configuration.
+
+   iii) Configuring other options.
+
+   While you're configuring your kernel, try to configure as much as possible
+   to build as modules. We recommend this because there are a number of drivers
+   that are still in the process of implementing proper power management
+   support. In those cases, the best way to work around their current lack is
+   to build them as modules and remove the modules while hibernating. You might
+   also bug the driver authors to get their support up to speed, or even help!
+
+   b. Storage.
+
+   i) Swap.
+
+   TuxOnIce can store the hibernation image in your swap partition, a swap file or
+   a combination thereof. Whichever combination you choose, you will probably
+   want to create enough swap space to store the largest image you could have,
+   plus the space you'd normally use for swap. A good rule of thumb would be
+   to calculate the amount of swap you'd want without using TuxOnIce, and then
+   add the amount of memory you have. This swapspace can be arranged in any way
+   you'd like. It can be in one partition or file, or spread over a number. The
+   only requirement is that they be active when you start a hibernation cycle.
+
+   There is one exception to this requirement. TuxOnIce has the ability to turn
+   on one swap file or partition at the start of hibernating and turn it back off
+   at the end. If you want to ensure you have enough memory to store a image
+   when your memory is fully used, you might want to make one swap partition or
+   file for 'normal' use, and another for TuxOnIce to activate & deactivate
+   automatically. (Further details below).
+
+   ii) Normal files.
+
+   TuxOnIce includes a 'file allocator'. The file allocator can store your
+   image in a simple file. Since Linux has the concept of everything being a
+   file, this is more powerful than it initially sounds. If, for example, you
+   were to set up a network block device file, you could hibernate to a network
+   server. This has been tested and works to a point, but nbd itself isn't
+   stateless enough for our purposes.
+
+   Take extra care when setting up the file allocator. If you just type
+   commands without thinking and then try to hibernate, you could cause
+   irreversible corruption on your filesystems! Make sure you have backups.
+
+   Most people will only want to hibernate to a local file. To achieve that, do
+   something along the lines of:
+
+   echo "TuxOnIce" > /hibernation-file
+   dd if=/dev/zero bs=1M count=512 >> /hibernation-file
+
+   This will create a 512MB file called /hibernation-file. To get TuxOnIce to use
+   it:
+
+   echo /hibernation-file > /sys/power/tuxonice/file/target
+
+   Then
+
+   cat /sys/power/tuxonice/resume
+
+   Put the results of this into your bootloader's configuration (see also step
+   C, below):
+
+   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
+   # cat /sys/power/tuxonice/resume
+   file:/dev/hda2:0x1e001
+
+   In this example, we would edit the append= line of our lilo.conf|menu.lst
+   so that it included:
+
+   resume=file:/dev/hda2:0x1e001
+   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
+
+   For those who are thinking 'Could I make the file sparse?', the answer is
+   'No!'. At the moment, there is no way for TuxOnIce to fill in the holes in
+   a sparse file while hibernating. In the longer term (post merge!), I'd like
+   to change things so that the file could be dynamically resized and have
+   holes filled as needed. Right now, however, that's not possible and not a
+   priority.
+
+   c. Bootloader configuration.
+
+   Using TuxOnIce also requires that you add an extra parameter to
+   your lilo.conf or equivalent. Here's an example for a swap partition:
+
+   append="resume=swap:/dev/hda1"
+
+   This would tell TuxOnIce that /dev/hda1 is a swap partition you
+   have. TuxOnIce will use the swap signature of this partition as a
+   pointer to your data when you hibernate. This means that (in this example)
+   /dev/hda1 doesn't need to be _the_ swap partition where all of your data
+   is actually stored. It just needs to be a swap partition that has a
+   valid signature.
+
+   You don't need to have a swap partition for this purpose. TuxOnIce
+   can also use a swap file, but usage is a little more complex. Having made
+   your swap file, turn it on and do
+
+   cat /sys/power/tuxonice/swap/headerlocations
+
+   (this assumes you've already compiled your kernel with TuxOnIce
+   support and booted it). The results of the cat command will tell you
+   what you need to put in lilo.conf:
+
+   For swap partitions like /dev/hda1, simply use resume=/dev/hda1.
+   For swapfile `swapfile`, use resume=swap:/dev/hda2:0x242d.
+
+   If the swapfile changes for any reason (it is moved to a different
+   location, it is deleted and recreated, or the filesystem is
+   defragmented) then you will have to check
+   /sys/power/tuxonice/swap/headerlocations for a new resume_block value.
+
+   Once you've compiled and installed the kernel and adjusted your bootloader
+   configuration, you should only need to reboot for the most basic part
+   of TuxOnIce to be ready.
+
+   If you only compile in the swap allocator, or only compile in the file
+   allocator, you don't need to add the "swap:" part of the resume=
+   parameters above. resume=/dev/hda2:0x242d will work just as well. If you
+   have compiled both and your storage is on swap, you can also use this
+   format (the swap allocator is the default allocator).
+
+   When compiling your kernel, one of the options in the 'Power Management
+   Support' menu, just above the 'Enhanced Hibernation (TuxOnIce)' entry is
+   called 'Default resume partition'. This can be used to set a default value
+   for the resume= parameter.
+
+   d. The hibernate script.
+
+   Since the driver model in 2.6 kernels is still being developed, you may need
+   to do more than just configure TuxOnIce. Users of TuxOnIce usually start the
+   process via a script which prepares for the hibernation cycle, tells the
+   kernel to do its stuff and then restore things afterwards. This script might
+   involve:
+
+   - Switching to a text console and back if X doesn't like the video card
+     status on resume.
+   - Un/reloading drivers that don't play well with hibernation.
+
+   Note that you might not be able to unload some drivers if there are
+   processes using them. You might have to kill off processes that hold
+   devices open. Hint: if your X server accesses an USB mouse, doing a
+   'chvt' to a text console releases the device and you can unload the
+   module.
+
+   Check out the latest script (available on tuxonice.net).
+
+   e. The userspace user interface.
+
+   TuxOnIce has very limited support for displaying status if you only apply
+   the kernel patch - it can printk messages, but that is all. In addition,
+   some of the functions mentioned in this document (such as cancelling a cycle
+   or performing interactive debugging) are unavailable. To utilise these
+   functions, or simply get a nice display, you need the 'userui' component.
+   Userui comes in three flavours, usplash, fbsplash and text. Text should
+   work on any console. Usplash and fbsplash require the appropriate
+   (distro specific?) support.
+
+   To utilise a userui, TuxOnIce just needs to be told where to find the
+   userspace binary:
+
+   echo "/usr/local/sbin/tuxoniceui_fbsplash" > /sys/power/tuxonice/user_interface/program
+
+   The hibernate script can do this for you, and a default value for this
+   setting can be configured when compiling the kernel. This path is also
+   stored in the image header, so if you have an initrd or initramfs, you can
+   use the userui during the first part of resuming (prior to the atomic
+   restore) by putting the binary in the same path in your initrd/ramfs.
+   Alternatively, you can put it in a different location and do an echo
+   similar to the above prior to the echo > do_resume. The value saved in the
+   image header will then be ignored.
+
+4. Why not just use the version already in the kernel?
+
+   The version in the vanilla kernel has a number of drawbacks. The most
+   serious of these are:
+	- it has a maximum image size of 1/2 total memory;
+	- it doesn't allocate storage until after it has snapshotted memory.
+	  This means that you can't be sure hibernating will work until you
+	  see it start to write the image;
+	- it does not allow you to press escape to cancel a cycle;
+	- it does not allow you to press escape to cancel resuming;
+	- it does not allow you to automatically swapon a file when
+	  starting a cycle;
+	- it does not allow you to use multiple swap partitions or files;
+	- it does not allow you to use ordinary files;
+	- it just invalidates an image and continues to boot if you
+	  accidentally boot the wrong kernel after hibernating;
+	- it doesn't support any sort of nice display while hibernating;
+	- it is moving toward requiring that you have an initrd/initramfs
+	  to ever have a hope of resuming (uswsusp). While uswsusp will
+	  address some of the concerns above, it won't address all of them,
+          and will be more complicated to get set up;
+        - it doesn't have support for suspend-to-both (write a hibernation
+	  image, then suspend to ram; I think this is known as ReadySafe
+	  under M$).
+
+5. How do you use it?
+
+   A hibernation cycle can be started directly by doing:
+
+	echo > /sys/power/tuxonice/do_hibernate
+
+   In practice, though, you'll probably want to use the hibernate script
+   to unload modules, configure the kernel the way you like it and so on.
+   In that case, you'd do (as root):
+
+	hibernate
+
+   See the hibernate script's man page for more details on the options it
+   takes.
+
+   If you're using the text or splash user interface modules, one feature of
+   TuxOnIce that you might find useful is that you can press Escape at any time
+   during hibernating, and the process will be aborted.
+
+   Due to the way hibernation works, this means you'll have your system back and
+   perfectly usable almost instantly. The only exception is when it's at the
+   very end of writing the image. Then it will need to reload a small (usually
+   4-50MBs, depending upon the image characteristics) portion first.
+
+   Likewise, when resuming, you can press escape and resuming will be aborted.
+   The computer will then powerdown again according to settings at that time for
+   the powerdown method or rebooting.
+
+   You can change the settings for powering down while the image is being
+   written by pressing 'R' to toggle rebooting and 'O' to toggle between
+   suspending to ram and powering down completely).
+
+   If you run into problems with resuming, adding the "noresume" option to
+   the kernel command line will let you skip the resume step and recover your
+   system. This option shouldn't normally be needed, because TuxOnIce modifies
+   the image header prior to the atomic restore, and will thus prompt you
+   if it detects that you've tried to resume an image before (this flag is
+   removed if you press Escape to cancel a resume, so you won't be prompted
+   then).
+
+   Recent kernels (2.6.24 onwards) add support for resuming from a different
+   kernel to the one that was hibernated (thanks to Rafael for his work on
+   this - I've just embraced and enhanced the support for TuxOnIce). This
+   should further reduce the need for you to use the noresume option.
+
+6. What do all those entries in /sys/power/tuxonice do?
+
+   /sys/power/tuxonice is the directory which contains files you can use to
+   tune and configure TuxOnIce to your liking. The exact contents of
+   the directory will depend upon the version of TuxOnIce you're
+   running and the options you selected at compile time. In the following
+   descriptions, names in brackets refer to compile time options.
+   (Note that they're all dependant upon you having selected CONFIG_TUXONICE
+   in the first place!).
+
+   Since the values of these settings can open potential security risks, the
+   writeable ones are accessible only to the root user. You may want to
+   configure sudo to allow you to invoke your hibernate script as an ordinary
+   user.
+
+   - alloc/failure_test
+
+   This debugging option provides a way of testing TuxOnIce's handling of
+   memory allocation failures. Each allocation type that TuxOnIce makes has
+   been given a unique number (see the source code). Echo the appropriate
+   number into this entry, and when TuxOnIce attempts to do that allocation,
+   it will pretend there was a failure and act accordingly.
+
+   - alloc/find_max_mem_allocated
+
+   This debugging option will cause TuxOnIce to find the maximum amount of
+   memory it used during a cycle, and report that information in debugging
+   information at the end of the cycle.
+
+   - alt_resume_param
+
+   Instead of powering down after writing a hibernation image, TuxOnIce
+   supports resuming from a different image. This entry lets you set the
+   location of the signature for that image (the resume= value you'd use
+   for it). Using an alternate image and keep_image mode, you can do things
+   like using an alternate image to power down an uninterruptible power
+   supply.
+
+   - block_io/target_outstanding_io
+
+   This value controls the amount of memory that the block I/O code says it
+   needs when the core code is calculating how much memory is needed for
+   hibernating and for resuming. It doesn't directly control the amount of
+   I/O that is submitted at any one time - that depends on the amount of
+   available memory (we may have more available than we asked for), the
+   throughput that is being achieved and the ability of the CPU to keep up
+   with disk throughput (particularly where we're compressing pages).
+
+   - checksum/enabled
+
+   Use cryptoapi hashing routines to verify that Pageset2 pages don't change
+   while we're saving the first part of the image, and to get any pages that
+   do change resaved in the atomic copy. This should normally not be needed,
+   but if you're seeing issues, please enable this. If your issues stop you
+   being able to resume, enable this option, hibernate and cancel the cycle
+   after the atomic copy is done. If the debugging info shows a non-zero
+   number of pages resaved, please report this to Nigel.
+
+   - compression/algorithm
+
+   Set the cryptoapi algorithm used for compressing the image.
+
+   - compression/expected_compression
+
+   These values allow you to set an expected compression ratio, which TuxOnice
+   will use in calculating whether it meets constraints on the image size. If
+   this expected compression ratio is not attained, the hibernation cycle will
+   abort, so it is wise to allow some spare. You can see what compression
+   ratio is achieved in the logs after hibernating.
+
+   - debug_info:
+
+   This file returns information about your configuration that may be helpful
+   in diagnosing problems with hibernating.
+
+   - did_suspend_to_both:
+
+   This file can be used when you hibernate with powerdown method 3 (ie suspend
+   to ram after writing the image). There can be two outcomes in this case. We
+   can resume from the suspend-to-ram before the battery runs out, or we can run
+   out of juice and and up resuming like normal. This entry lets you find out,
+   post resume, which way we went. If the value is 1, we resumed from suspend
+   to ram. This can be useful when actions need to be run post suspend-to-ram
+   that don't need to be run if we did the normal resume from power off.
+
+   - do_hibernate:
+
+   When anything is written to this file, the kernel side of TuxOnIce will
+   begin to attempt to write an image to disk and power down. You'll normally
+   want to run the hibernate script instead, to get modules unloaded first.
+
+   - do_resume:
+
+   When anything is written to this file TuxOnIce will attempt to read and
+   restore an image. If there is no image, it will return almost immediately.
+   If an image exists, the echo > will never return. Instead, the original
+   kernel context will be restored and the original echo > do_hibernate will
+   return.
+
+   - */enabled
+
+   These option can be used to temporarily disable various parts of TuxOnIce.
+
+   - extra_pages_allowance
+
+   When TuxOnIce does its atomic copy, it calls the driver model suspend
+   and resume methods. If you have DRI enabled with a driver such as fglrx,
+   this can result in the driver allocating a substantial amount of memory
+   for storing its state. Extra_pages_allowance tells TuxOnIce how much
+   extra memory it should ensure is available for those allocations. If
+   your attempts at hibernating end with a message in dmesg indicating that
+   insufficient extra pages were allowed, you need to increase this value.
+
+   - file/target:
+
+   Read this value to get the current setting. Write to it to point TuxOnice
+   at a new storage location for the file allocator. See section 3.b.ii above
+   for details of how to set up the file allocator.
+
+   - freezer_test
+
+   This entry can be used to get TuxOnIce to just test the freezer and prepare
+   an image without actually doing a hibernation cycle. It is useful for
+   diagnosing freezing and image preparation issues.
+
+   - full_pageset2
+
+   TuxOnIce divides the pages that are stored in an image into two sets. The
+   difference between the two sets is that pages in pageset 1 are atomically
+   copied, and pages in pageset 2 are written to disk without being copied
+   first. A page CAN be written to disk without being copied first if and only
+   if its contents will not be modified or used at any time after userspace
+   processes are frozen. A page MUST be in pageset 1 if its contents are
+   modified or used at any time after userspace processes have been frozen.
+
+   Normally (ie if this option is enabled), TuxOnIce will put all pages on the
+   per-zone LRUs in pageset2, then remove those pages used by any userspace
+   user interface helper and TuxOnIce storage manager that are running,
+   together with pages used by the GEM memory manager introduced around 2.6.28
+   kernels.
+
+   If this option is disabled, a much more conservative approach will be taken.
+   The only pages in pageset2 will be those belonging to userspace processes,
+   with the exclusion of those belonging to the TuxOnIce userspace helpers
+   mentioned above. This will result in a much smaller pageset2, and will
+   therefore result in smaller images than are possible with this option
+   enabled.
+
+   - ignore_rootfs
+
+   TuxOnIce records which device is mounted as the root filesystem when
+   writing the hibernation image. It will normally check at resume time that
+   this device isn't already mounted - that would be a cause of filesystem
+   corruption. In some particular cases (RAM based root filesystems), you
+   might want to disable this check. This option allows you to do that.
+
+   - image_exists:
+
+   Can be used in a script to determine whether a valid image exists at the
+   location currently pointed to by resume=. Returns up to three lines.
+   The first is whether an image exists (-1 for unsure, otherwise 0 or 1).
+   If an image eixsts, additional lines will return the machine and version.
+   Echoing anything to this entry removes any current image.
+
+   - image_size_limit:
+
+   The maximum size of hibernation image written to disk, measured in megabytes
+   (1024*1024).
+
+   - last_result:
+
+   The result of the last hibernation cycle, as defined in
+   include/linux/suspend-debug.h with the values SUSPEND_ABORTED to
+   SUSPEND_KEPT_IMAGE. This is a bitmask.
+
+   - late_cpu_hotplug:
+
+   This sysfs entry controls whether cpu hotplugging is done - as normal - just
+   before (unplug) and after (replug) the atomic copy/restore (so that all
+   CPUs/cores are available for multithreaded I/O). The alternative is to
+   unplug all secondary CPUs/cores at the start of hibernating/resuming, and
+   replug them at the end of resuming. No multithreaded I/O will be possible in
+   this configuration, but the odd machine has been reported to require it.
+
+   - lid_file:
+
+   This determines which ACPI button file we look in to determine whether the
+   lid is open or closed after resuming from suspend to disk or power off.
+   If the entry is set to "lid/LID", we'll open /proc/acpi/button/lid/LID/state
+   and check its contents at the appropriate moment. See post_wake_state below
+   for more details on how this entry is used.
+
+   - log_everything (CONFIG_PM_DEBUG):
+
+   Setting this option results in all messages printed being logged. Normally,
+   only a subset are logged, so as to not slow the process and not clutter the
+   logs. Useful for debugging. It can be toggled during a cycle by pressing
+   'L'.
+
+   - no_load_direct:
+
+   This is a debugging option. If, when loading the atomically copied pages of
+   an image, TuxOnIce finds that the destination address for a page is free,
+   it will normally allocate the image, load the data directly into that
+   address and skip it in the atomic restore. If this option is disabled, the
+   page will be loaded somewhere else and atomically restored like other pages.
+
+   - no_flusher_thread:
+
+   When doing multithreaded I/O (see below), the first online CPU can be used
+   to _just_ submit compressed pages when writing the image, rather than
+   compressing and submitting data. This option is normally disabled, but has
+   been included because Nigel would like to see whether it will be more useful
+   as the number of cores/cpus in computers increases.
+
+   - no_multithreaded_io:
+
+   TuxOnIce will normally create one thread per cpu/core on your computer,
+   each of which will then perform I/O. This will generally result in
+   throughput that's the maximum the storage medium can handle. There
+   shouldn't be any reason to disable multithreaded I/O now, but this option
+   has been retained for debugging purposes.
+
+   - no_pageset2
+
+   See the entry for full_pageset2 above for an explanation of pagesets.
+   Enabling this option causes TuxOnIce to do an atomic copy of all pages,
+   thereby limiting the maximum image size to 1/2 of memory, as swsusp does.
+
+   - no_pageset2_if_unneeded
+
+   See the entry for full_pageset2 above for an explanation of pagesets.
+   Enabling this option causes TuxOnIce to act like no_pageset2 was enabled
+   if and only it isn't needed anyway. This option may still make TuxOnIce
+   less reliable because pageset2 pages are normally used to store the
+   atomic copy - drivers that want to do allocations of larger amounts of
+   memory in one shot will be more likely to find that those amounts aren't
+   available if this option is enabled.
+
+   - pause_between_steps (CONFIG_PM_DEBUG):
+
+   This option is used during debugging, to make TuxOnIce pause between
+   each step of the process. It is ignored when the nice display is on.
+
+   - post_wake_state:
+
+   TuxOnIce provides support for automatically waking after a user-selected
+   delay, and using a different powerdown method if the lid is still closed.
+   (Yes, we're assuming a laptop).  This entry lets you choose what state
+   should be entered next. The values are those described under
+   powerdown_method, below. It can be used to suspend to RAM after hibernating,
+   then powerdown properly (say) 20 minutes. It can also be used to power down
+   properly, then wake at (say) 6.30am and suspend to RAM until you're ready
+   to use the machine.
+
+   - powerdown_method:
+
+   Used to select a method by which TuxOnIce should powerdown after writing the
+   image. Currently:
+
+   0: Don't use ACPI to power off.
+   3: Attempt to enter Suspend-to-ram.
+   4: Attempt to enter ACPI S4 mode.
+   5: Attempt to power down via ACPI S5 mode.
+
+   Note that these options are highly dependant upon your hardware & software:
+
+   3: When succesful, your machine suspends to ram instead of powering off.
+      The advantage of using this mode is that it doesn't matter whether your
+      battery has enough charge to make it through to your next resume. If it
+      lasts, you will simply resume from suspend to ram (and the image on disk
+      will be discarded). If the battery runs out, you will resume from disk
+      instead. The disadvantage is that it takes longer than a normal
+      suspend-to-ram to enter the state, since the suspend-to-disk image needs
+      to be written first.
+   4/5: When successful, your machine will be off and comsume (almost) no power.
+      But it might still react to some external events like opening the lid or
+      trafic on  a network or usb device. For the bios, resume is then the same
+      as warm boot, similar to a situation where you used the command `reboot'
+      to reboot your machine. If your machine has problems on warm boot or if
+      you want to protect your machine with the bios password, this is probably
+      not the right choice. Mode 4 may be necessary on some machines where ACPI
+      wake up methods need to be run to properly reinitialise hardware after a
+      hibernation cycle.
+   0: Switch the machine completely off. The only possible wakeup is the power
+      button. For the bios, resume is then the same as a cold boot, in
+      particular you would  have to provide your bios boot password if your
+      machine uses that feature for booting.
+
+   - progressbar_granularity_limit:
+
+   This option can be used to limit the granularity of the progress bar
+   displayed with a bootsplash screen. The value is the maximum number of
+   steps. That is, 10 will make the progress bar jump in 10% increments.
+
+   - reboot:
+
+   This option causes TuxOnIce to reboot rather than powering down
+   at the end of saving an image. It can be toggled during a cycle by pressing
+   'R'.
+
+   - resume:
+
+   This sysfs entry can be used to read and set the location in which TuxOnIce
+   will look for the signature of an image - the value set using resume= at
+   boot time or CONFIG_PM_STD_PARTITION ("Default resume partition"). By
+   writing to this file as well as modifying your bootloader's configuration
+   file (eg menu.lst), you can set or reset the location of your image or the
+   method of storing the image without rebooting.
+
+   - replace_swsusp (CONFIG_TOI_REPLACE_SWSUSP):
+
+   This option makes
+
+     echo disk > /sys/power/state
+
+   activate TuxOnIce instead of swsusp. Regardless of whether this option is
+   enabled, any invocation of swsusp's resume time trigger will cause TuxOnIce
+   to check for an image too. This is due to the fact that at resume time, we
+   can't know whether this option was enabled until we see if an image is there
+   for us to resume from. (And when an image exists, we don't care whether we
+   did replace swsusp anyway - we just want to resume).
+
+   - resume_commandline:
+
+   This entry can be read after resuming to see the commandline that was used
+   when resuming began. You might use this to set up two bootloader entries
+   that are the same apart from the fact that one includes a extra append=
+   argument "at_work=1". You could then grep resume_commandline in your
+   post-resume scripts and configure networking (for example) differently
+   depending upon whether you're at home or work. resume_commandline can be
+   set to arbitrary text if you wish to remove sensitive contents.
+
+   - swap/swapfilename:
+
+   This entry is used to specify the swapfile or partition that
+   TuxOnIce will attempt to swapon/swapoff automatically. Thus, if
+   I normally use /dev/hda1 for swap, and want to use /dev/hda2 for specifically
+   for my hibernation image, I would
+
+   echo /dev/hda2 > /sys/power/tuxonice/swap/swapfile
+
+   /dev/hda2 would then be automatically swapon'd and swapoff'd. Note that the
+   swapon and swapoff occur while other processes are frozen (including kswapd)
+   so this swap file will not be used up when attempting to free memory. The
+   parition/file is also given the highest priority, so other swapfiles/partitions
+   will only be used to save the image when this one is filled.
+
+   The value of this file is used by headerlocations along with any currently
+   activated swapfiles/partitions.
+
+   - swap/headerlocations:
+
+   This option tells you the resume= options to use for swap devices you
+   currently have activated. It is particularly useful when you only want to
+   use a swap file to store your image. See above for further details.
+
+   - test_bio
+
+   This is a debugging option. When enabled, TuxOnIce will not hibernate.
+   Instead, when asked to write an image, it will skip the atomic copy,
+   just doing the writing of the image and then returning control to the
+   user at the point where it would have powered off. This is useful for
+   testing throughput in different configurations.
+
+   - test_filter_speed
+
+   This is a debugging option. When enabled, TuxOnIce will not hibernate.
+   Instead, when asked to write an image, it will not write anything or do
+   an atomic copy, but will only run any enabled compression algorithm on the
+   data that would have been written (the source pages of the atomic copy in
+   the case of pageset 1). This is useful for comparing the performance of
+   compression algorithms and for determining the extent to which an upgrade
+   to your storage method would improve hibernation speed.
+
+   - user_interface/debug_sections (CONFIG_PM_DEBUG):
+
+   This value, together with the console log level, controls what debugging
+   information is displayed. The console log level determines the level of
+   detail, and this value determines what detail is displayed. This value is
+   a bit vector, and the meaning of the bits can be found in the kernel tree
+   in include/linux/tuxonice.h. It can be overridden using the kernel's
+   command line option suspend_dbg.
+
+   - user_interface/default_console_level (CONFIG_PM_DEBUG):
+
+   This determines the value of the console log level at the start of a
+   hibernation cycle. If debugging is compiled in, the console log level can be
+   changed during a cycle by pressing the digit keys. Meanings are:
+
+   0: Nice display.
+   1: Nice display plus numerical progress.
+   2: Errors only.
+   3: Low level debugging info.
+   4: Medium level debugging info.
+   5: High level debugging info.
+   6: Verbose debugging info.
+
+   - user_interface/enable_escape:
+
+   Setting this to "1" will enable you abort a hibernation cycle or resuming by
+   pressing escape, "0" (default) disables this feature. Note that enabling
+   this option means that you cannot initiate a hibernation cycle and then walk
+   away from your computer, expecting it to be secure. With feature disabled,
+   you can validly have this expectation once TuxOnice begins to write the
+   image to disk. (Prior to this point, it is possible that TuxOnice might
+   about because of failure to freeze all processes or because constraints
+   on its ability to save the image are not met).
+
+   - user_interface/program
+
+   This entry is used to tell TuxOnice what userspace program to use for
+   providing a user interface while hibernating. The program uses a netlink
+   socket to pass messages back and forward to the kernel, allowing all of the
+   functions formerly implemented in the kernel user interface components.
+
+   - version:
+
+   The version of TuxOnIce you have compiled into the currently running kernel.
+
+   - wake_alarm_dir:
+
+   As mentioned above (post_wake_state), TuxOnIce supports automatically waking
+   after some delay. This entry allows you to select which wake alarm to use.
+   It should contain the value "rtc0" if you're wanting to use
+   /sys/class/rtc/rtc0.
+
+   - wake_delay:
+
+   This value determines the delay from the end of writing the image until the
+   wake alarm is triggered. You can set an absolute time by writing the desired
+   time into /sys/class/rtc/<wake_alarm_dir>/wakealarm and leaving these values
+   empty.
+
+   Note that for the wakeup to actually occur, you may need to modify entries
+   in /proc/acpi/wakeup. This is done by echoing the name of the button in the
+   first column (eg PBTN) into the file.
+
+7. How do you get support?
+
+   Glad you asked. TuxOnIce is being actively maintained and supported
+   by Nigel (the guy doing most of the kernel coding at the moment), Bernard
+   (who maintains the hibernate script and userspace user interface components)
+   and its users.
+
+   Resources availble include HowTos, FAQs and a Wiki, all available via
+   tuxonice.net.  You can find the mailing lists there.
+
+8. I think I've found a bug. What should I do?
+
+   By far and a way, the most common problems people have with TuxOnIce
+   related to drivers not having adequate power management support. In this
+   case, it is not a bug with TuxOnIce, but we can still help you. As we
+   mentioned above, such issues can usually be worked around by building the
+   functionality as modules and unloading them while hibernating. Please visit
+   the Wiki for up-to-date lists of known issues and work arounds.
+
+   If this information doesn't help, try running:
+
+   hibernate --bug-report
+
+   ..and sending the output to the users mailing list.
+
+   Good information on how to provide us with useful information from an
+   oops is found in the file REPORTING-BUGS, in the top level directory
+   of the kernel tree. If you get an oops, please especially note the
+   information about running what is printed on the screen through ksymoops.
+   The raw information is useless.
+
+9. When will XXX be supported?
+
+   If there's a feature missing from TuxOnIce that you'd like, feel free to
+   ask. We try to be obliging, within reason.
+
+   Patches are welcome. Please send to the list.
+
+10. How does it work?
+
+   TuxOnIce does its work in a number of steps.
+
+   a. Freezing system activity.
+
+   The first main stage in hibernating is to stop all other activity. This is
+   achieved in stages. Processes are considered in fours groups, which we will
+   describe in reverse order for clarity's sake: Threads with the PF_NOFREEZE
+   flag, kernel threads without this flag, userspace processes with the
+   PF_SYNCTHREAD flag and all other processes. The first set (PF_NOFREEZE) are
+   untouched by the refrigerator code. They are allowed to run during hibernating
+   and resuming, and are used to support user interaction, storage access or the
+   like. Other kernel threads (those unneeded while hibernating) are frozen last.
+   This leaves us with userspace processes that need to be frozen. When a
+   process enters one of the *_sync system calls, we set a PF_SYNCTHREAD flag on
+   that process for the duration of that call. Processes that have this flag are
+   frozen after processes without it, so that we can seek to ensure that dirty
+   data is synced to disk as quickly as possible in a situation where other
+   processes may be submitting writes at the same time. Freezing the processes
+   that are submitting data stops new I/O from being submitted. Syncthreads can
+   then cleanly finish their work. So the order is:
+
+   - Userspace processes without PF_SYNCTHREAD or PF_NOFREEZE;
+   - Userspace processes with PF_SYNCTHREAD (they won't have NOFREEZE);
+   - Kernel processes without PF_NOFREEZE.
+
+   b. Eating memory.
+
+   For a successful hibernation cycle, you need to have enough disk space to store the
+   image and enough memory for the various limitations of TuxOnIce's
+   algorithm. You can also specify a maximum image size. In order to attain
+   to those constraints, TuxOnIce may 'eat' memory. If, after freezing
+   processes, the constraints aren't met, TuxOnIce will thaw all the
+   other processes and begin to eat memory until its calculations indicate
+   the constraints are met. It will then freeze processes again and recheck
+   its calculations.
+
+   c. Allocation of storage.
+
+   Next, TuxOnIce allocates the storage that will be used to save
+   the image.
+
+   The core of TuxOnIce knows nothing about how or where pages are stored. We
+   therefore request the active allocator (remember you might have compiled in
+   more than one!) to allocate enough storage for our expect image size. If
+   this request cannot be fulfilled, we eat more memory and try again. If it
+   is fulfiled, we seek to allocate additional storage, just in case our
+   expected compression ratio (if any) isn't achieved. This time, however, we
+   just continue if we can't allocate enough storage.
+
+   If these calls to our allocator change the characteristics of the image
+   such that we haven't allocated enough memory, we also loop. (The allocator
+   may well need to allocate space for its storage information).
+
+   d. Write the first part of the image.
+
+   TuxOnIce stores the image in two sets of pages called 'pagesets'.
+   Pageset 2 contains pages on the active and inactive lists; essentially
+   the page cache. Pageset 1 contains all other pages, including the kernel.
+   We use two pagesets for one important reason: We need to make an atomic copy
+   of the kernel to ensure consistency of the image. Without a second pageset,
+   that would limit us to an image that was at most half the amount of memory
+   available. Using two pagesets allows us to store a full image. Since pageset
+   2 pages won't be needed in saving pageset 1, we first save pageset 2 pages.
+   We can then make our atomic copy of the remaining pages using both pageset 2
+   pages and any other pages that are free. While saving both pagesets, we are
+   careful not to corrupt the image. Among other things, we use lowlevel block
+   I/O routines that don't change the pagecache contents.
+
+   The next step, then, is writing pageset 2.
+
+   e. Suspending drivers and storing processor context.
+
+   Having written pageset2, TuxOnIce calls the power management functions to
+   notify drivers of the hibernation, and saves the processor state in preparation
+   for the atomic copy of memory we are about to make.
+
+   f. Atomic copy.
+
+   At this stage, everything else but the TuxOnIce code is halted. Processes
+   are frozen or idling, drivers are quiesced and have stored (ideally and where
+   necessary) their configuration in memory we are about to atomically copy.
+   In our lowlevel architecture specific code, we have saved the CPU state.
+   We can therefore now do our atomic copy before resuming drivers etc.
+
+   g. Save the atomic copy (pageset 1).
+
+   TuxOnice can then write the atomic copy of the remaining pages. Since we
+   have copied the pages into other locations, we can continue to use the
+   normal block I/O routines without fear of corruption our image.
+
+   f. Save the image header.
+
+   Nearly there! We save our settings and other parameters needed for
+   reloading pageset 1 in an 'image header'. We also tell our allocator to
+   serialise its data at this stage, so that it can reread the image at resume
+   time.
+
+   g. Set the image header.
+
+   Finally, we edit the header at our resume= location. The signature is
+   changed by the allocator to reflect the fact that an image exists, and to
+   point to the start of that data if necessary (swap allocator).
+
+   h. Power down.
+
+   Or reboot if we're debugging and the appropriate option is selected.
+
+   Whew!
+
+   Reloading the image.
+   --------------------
+
+   Reloading the image is essentially the reverse of all the above. We load
+   our copy of pageset 1, being careful to choose locations that aren't going
+   to be overwritten as we copy it back (We start very early in the boot
+   process, so there are no other processes to quiesce here). We then copy
+   pageset 1 back to its original location in memory and restore the process
+   context. We are now running with the original kernel. Next, we reload the
+   pageset 2 pages, free the memory and swap used by TuxOnIce, restore
+   the pageset header and restart processes. Sounds easy in comparison to
+   hibernating, doesn't it!
+
+   There is of course more to TuxOnIce than this, but this explanation
+   should be a good start. If there's interest, I'll write further
+   documentation on range pages and the low level I/O.
+
+11. Who wrote TuxOnIce?
+
+   (Answer based on the writings of Florent Chabaud, credits in files and
+   Nigel's limited knowledge; apologies to anyone missed out!)
+
+   The main developers of TuxOnIce have been...
+
+   Gabor Kuti
+   Pavel Machek
+   Florent Chabaud
+   Bernard Blackham
+   Nigel Cunningham
+
+   Significant portions of swsusp, the code in the vanilla kernel which
+   TuxOnIce enhances, have been worked on by Rafael Wysocki. Thanks should
+   also be expressed to him.
+
+   The above mentioned developers have been aided in their efforts by a host
+   of hundreds, if not thousands of testers and people who have submitted bug
+   fixes & suggestions. Of special note are the efforts of Michael Frank, who
+   had his computers repetitively hibernate and resume for literally tens of
+   thousands of cycles and developed scripts to stress the system and test
+   TuxOnIce far beyond the point most of us (Nigel included!) would consider
+   testing. His efforts have contributed as much to TuxOnIce as any of the
+   names above.
diff -Nur linux-4.2/Documentation/scsi/link_power_management_policy.txt linux-4.2-pck/Documentation/scsi/link_power_management_policy.txt
--- linux-4.2/Documentation/scsi/link_power_management_policy.txt	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/Documentation/scsi/link_power_management_policy.txt	2015-09-08 00:48:22.211698004 -0300
@@ -1,8 +1,11 @@
 This parameter allows the user to set the link (interface) power management.
-There are 3 possible options:
+There are 4 possible options:
 
 Value			Effect
 ----------------------------------------------------------------------------
+firmware_defaults	Inherit configuration from the state programmed by
+			the firmware during system init.
+
 min_power		Tell the controller to try to make the link use the
 			least possible power when possible.  This may
 			sacrifice some performance due to increased latency
diff -Nur linux-4.2/Documentation/tp_smapi.txt linux-4.2-pck/Documentation/tp_smapi.txt
--- linux-4.2/Documentation/tp_smapi.txt	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/tp_smapi.txt	2015-09-08 00:48:22.211698004 -0300
@@ -0,0 +1,267 @@
+tp_smapi version 0.40
+IBM ThinkPad hardware functions driver
+
+Author:  Shem Multinymous <multinymous@gmail.com>
+Project: http://sourceforge.net/projects/tpctl
+Wiki:    http://thinkwiki.org/wiki/tp_smapi
+List:    linux-thinkpad@linux-thinkpad.org
+         (http://mailman.linux-thinkpad.org/mailman/listinfo/linux-thinkpad)
+
+Description
+-----------
+
+ThinkPad laptops include a proprietary interface called SMAPI BIOS
+(System Management Application Program Interface) which provides some
+hardware control functionality that is not accessible by other means.
+
+This driver exposes some features of the SMAPI BIOS through a sysfs
+interface. It is suitable for newer models, on which SMAPI is invoked
+through IO port writes. Older models use a different SMAPI interface;
+for those, try the "thinkpad" module from the "tpctl" package.
+
+WARNING:
+This driver uses undocumented features and direct hardware access.
+It thus cannot be guaranteed to work, and may cause arbitrary damage
+(especially on models it wasn't tested on).
+
+
+Module parameters
+-----------------
+
+thinkpad_ec module:
+  force_io=1 lets thinkpad_ec load on some recent ThinkPad models
+  (e.g., T400 and T500) whose BIOS's ACPI DSDT reserves the ports we need.
+tp_smapi module:
+  debug=1    enables verbose dmesg output.
+
+
+Usage
+-----
+
+Control of battery charging thresholds (in percents of current full charge
+capacity):
+
+# echo 40 > /sys/devices/platform/smapi/BAT0/start_charge_thresh
+# echo 70 > /sys/devices/platform/smapi/BAT0/stop_charge_thresh
+# cat /sys/devices/platform/smapi/BAT0/*_charge_thresh
+
+    (This is useful since Li-Ion batteries wear out much faster at very
+     high or low charge levels. The driver will also keeps the thresholds
+     across suspend-to-disk with AC disconnected; this isn't done
+     automatically by the hardware.)
+
+Inhibiting battery charging for 17 minutes (overrides thresholds):
+
+# echo 17 > /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes
+# echo 0  > /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes  # stop
+# cat /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes
+
+    (This can be used to control which battery is charged when using an
+     Ultrabay battery.)
+
+Forcing battery discharging even if AC power available:
+
+# echo 1 > /sys/devices/platform/smapi/BAT0/force_discharge  # start discharge
+# echo 0 > /sys/devices/platform/smapi/BAT0/force_discharge  # stop discharge
+# cat /sys/devices/platform/smapi/BAT0/force_discharge
+
+    (When AC is connected, forced discharging will automatically stop
+     when battery is fully depleted -- this is useful for calibration.
+     Also, this attribute can be used to control which battery is discharged
+     when both a system battery and an Ultrabay battery are connected.)
+
+Misc read-only battery status attributes (see note about HDAPS below):
+
+/sys/devices/platform/smapi/BAT0/installed   # 0 or 1
+/sys/devices/platform/smapi/BAT0/state       # idle/charging/discharging
+/sys/devices/platform/smapi/BAT0/cycle_count # integer counter
+/sys/devices/platform/smapi/BAT0/current_now # instantaneous current
+/sys/devices/platform/smapi/BAT0/current_avg # last minute average
+/sys/devices/platform/smapi/BAT0/power_now   # instantaneous power
+/sys/devices/platform/smapi/BAT0/power_avg   # last minute average
+/sys/devices/platform/smapi/BAT0/last_full_capacity         # in mWh
+/sys/devices/platform/smapi/BAT0/remaining_percent          # remaining percent of energy (set by calibration)
+/sys/devices/platform/smapi/BAT0/remaining_percent_error    # error range of remaing_percent (not reset by calibration)
+/sys/devices/platform/smapi/BAT0/remaining_running_time     # in minutes, by last minute average power
+/sys/devices/platform/smapi/BAT0/remaining_running_time_now # in minutes, by instantenous power
+/sys/devices/platform/smapi/BAT0/remaining_charging_time    # in minutes
+/sys/devices/platform/smapi/BAT0/remaining_capacity         # in mWh
+/sys/devices/platform/smapi/BAT0/design_capacity            # in mWh
+/sys/devices/platform/smapi/BAT0/voltage           # in mV
+/sys/devices/platform/smapi/BAT0/design_voltage    # in mV
+/sys/devices/platform/smapi/BAT0/charging_max_current  # max charging current
+/sys/devices/platform/smapi/BAT0/charging_max_voltage  # max charging voltage
+/sys/devices/platform/smapi/BAT0/group{0,1,2,3}_voltage # see below
+/sys/devices/platform/smapi/BAT0/manufacturer      # string
+/sys/devices/platform/smapi/BAT0/model             # string
+/sys/devices/platform/smapi/BAT0/barcoding         # string
+/sys/devices/platform/smapi/BAT0/chemistry         # string
+/sys/devices/platform/smapi/BAT0/serial            # integer
+/sys/devices/platform/smapi/BAT0/manufacture_date  # YYYY-MM-DD
+/sys/devices/platform/smapi/BAT0/first_use_date    # YYYY-MM-DD
+/sys/devices/platform/smapi/BAT0/temperature  # in milli-Celsius
+/sys/devices/platform/smapi/BAT0/dump         # see below
+/sys/devices/platform/smapi/ac_connected      # 0 or 1
+
+The BAT0/group{0,1,2,3}_voltage attribute refers to the separate cell groups
+in each battery. For example, on the ThinkPad 600, X3x, T4x and R5x models,
+the battery contains 3 cell groups in series, where each group consisting of 2
+or 3 cells  connected in parallel. The voltage of each group is given by these
+attributes, and their sum (roughly) equals the "voltage" attribute.
+(The effective performance of the battery is determined by the weakest group,
+i.e., the one those voltage changes most rapidly during dis/charging.)
+
+The "BAT0/dump" attribute gives a a hex dump of the raw status data, which
+contains additional data now in the above (if you can figure it out). Some
+unused values are autodetected and replaced by "--":
+
+In all of the above, replace BAT0 with BAT1 to address the 2nd battery (e.g.
+in the UltraBay).
+
+
+Raw SMAPI calls:
+
+/sys/devices/platform/smapi/smapi_request
+This performs raw SMAPI calls. It uses a bad interface that cannot handle
+multiple simultaneous access. Don't touch it, it's for development only.
+If you did touch it, you would so something like
+# echo '211a 100 0 0' > /sys/devices/platform/smapi/smapi_request
+# cat /sys/devices/platform/smapi/smapi_request
+and notice that in the output "211a 34b b2 0 0 0 'OK'", the "4b" in the 2nd
+value, converted to decimal is 75: the current charge stop threshold.
+
+
+Model-specific status
+---------------------
+
+Works (at least partially) on the following ThinkPad model:
+* A30
+* G41
+* R40, R50p, R51, R52
+* T23, T40, T40p, T41, T41p, T42, T42p, T43, T43p, T60
+* X24, X31, X32, X40, X41, X60
+* Z60t, Z61m
+
+Not all functions are available on all models; for detailed status, see:
+  http://thinkwiki.org/wiki/tp_smapi
+
+Please report success/failure by e-mail or on the Wiki.
+If you get a "not implemented" or "not supported" message, your laptop
+probably just can't do that (at least not via the SMAPI BIOS).
+For negative reports, follow the bug reporting guidelines below.
+If you send me the necessary technical data (i.e., SMAPI function
+interfaces), I will support additional models.
+
+
+Additional HDAPS features
+-------------------------
+
+The modified hdaps driver has several improvements on the one in mainline
+(beyond resolving the conflict with thinkpad_ec and tp_smapi):
+
+- Fixes reliability and improves support for recent ThinkPad models
+  (especially *60 and newer). Unlike the mainline driver, the modified hdaps
+  correctly follows the Embedded Controller communication protocol.
+
+- Extends the "invert" parameter to cover all possible axis orientations.
+  The possible values are as follows.
+  Let X,Y denote the hardware readouts.
+  Let R denote the laptop's roll (tilt left/right).
+  Let P denote the laptop's pitch (tilt forward/backward).
+    invert=0:   R= X  P= Y   (same as mainline)
+    invert=1:   R=-X  P=-Y   (same as mainline)
+    invert=2:   R=-X  P= Y   (new)
+    invert=3:   R= X  P=-Y   (new)
+    invert=4:   R= Y  P= X   (new)
+    invert=5:   R=-Y  P=-X   (new)
+    invert=6:   R=-Y  P= X   (new)
+    invert=7:   R= Y  P=-X   (new)
+  It's probably easiest to just try all 8 possibilities and see which yields
+  correct results (e.g., in the hdaps-gl visualisation).
+
+- Adds a whitelist which automatically sets the correct axis orientation for
+  some models. If the value for your model is wrong or missing, you can override
+  it using the "invert" parameter. Please also update the tables at
+  http://www.thinkwiki.org/wiki/tp_smapi and
+  http://www.thinkwiki.org/wiki/List_of_DMI_IDs
+  and submit a patch for the whitelist in hdaps.c.
+
+- Provides new attributes:
+  /sys/devices/platform/hdaps/sampling_rate:
+    This determines the frequency at which the host queries the embedded
+    controller for accelerometer data (and informs the hdaps input devices).
+    Default=50.
+  /sys/devices/platform/hdaps/oversampling_ratio:
+    When set to X, the embedded controller is told to do physical accelerometer
+    measurements at a rate that is X times higher than the rate at which
+    the driver reads those measurements (i.e., X*sampling_rate). This
+    makes the readouts from the embedded controller more fresh, and is also
+    useful for the running average filter (see next). Default=5
+  /sys/devices/platform/hdaps/running_avg_filter_order:
+    When set to X, reported readouts will be the average of the last X physical
+    accelerometer measurements. Current firmware allows 1<=X<=8. Setting to a
+    high value decreases readout fluctuations. The averaging is handled by the
+    embedded controller, so no CPU resources are used. Higher values make the
+    readouts smoother, since it averages out both sensor noise (good) and abrupt
+    changes (bad). Default=2.
+
+- Provides a second input device, which publishes the raw accelerometer
+  measurements (without the fuzzing needed for joystick emulation). This input
+  device can be matched by a udev rule such as the following (all on one line):
+    KERNEL=="event[0-9]*", ATTRS{phys}=="hdaps/input1",
+    ATTRS{modalias}=="input:b0019v1014p5054e4801-*",
+    SYMLINK+="input/hdaps/accelerometer-event
+
+A new version of the hdapsd userspace daemon, which uses the input device
+interface instead of polling sysfs, is available seprately. Using this reduces
+the total interrupts per second generated by hdaps+hdapsd (on tickless kernels)
+to 50, down from a value that fluctuates between 50 and 100. Set the
+sampling_rate sysfs attribute to a lower value to further reduce interrupts,
+at the expense of response latency.
+
+Licensing note: all my changes to the HDAPS driver are licensed under the
+GPL version 2 or, at your option and to the extent allowed by derivation from
+prior works, any later version. My version of hdaps is derived work from the
+mainline version, which at the time of writing is available only under
+GPL version 2.
+
+Bug reporting
+-------------
+
+Mail <multinymous@gmail.com>. Please include:
+* Details about your model,
+* Relevant "dmesg" output. Make sure thinkpad_ec and tp_smapi are loaded with
+  the "debug=1" parameter (e.g., use "make load HDAPS=1 DEBUG=1").
+* Output of "dmidecode | grep -C5 Product"
+* Does the failed functionality works under Windows?
+
+
+More about SMAPI
+----------------
+
+For hints about what may be possible via the SMAPI BIOS and how, see:
+
+* IBM Technical Reference Manual for the ThinkPad 770
+  (http://www-307.ibm.com/pc/support/site.wss/document.do?lndocid=PFAN-3TUQQD)
+* Exported symbols in PWRMGRIF.DLL or TPPWRW32.DLL (e.g., use "objdump -x").
+* drivers/char/mwave/smapi.c in the Linux kernel tree.*
+* The "thinkpad" SMAPI module (http://tpctl.sourceforge.net).
+* The SMAPI_* constants in tp_smapi.c.
+
+Note that in the above Technical Reference and in the "thinkpad" module,
+SMAPI is invoked through a function call to some physical address. However,
+the interface used by tp_smapi and the above mwave drive, and apparently
+required by newer ThinkPad, is different: you set the parameters up in the
+CPU's registers and write to ports 0xB2 (the APM control port) and 0x4F; this
+triggers an SMI (System Management Interrupt), causing the CPU to enter
+SMM (System Management Mode) and run the BIOS firmware; the results are
+returned in the CPU's registers. It is not clear what is the relation between
+the two variants of SMAPI, though the assignment of error codes seems to be
+similar.
+
+In addition, the embedded controller on ThinkPad laptops has a non-standard
+interface at IO ports 0x1600-0x161F (mapped to LCP channel 3 of the H8S chip).
+The interface provides various system management services (currently known:
+battery information and accelerometer readouts). For more information see the
+thinkpad_ec module and the H8S hardware documentation:
+http://documentation.renesas.com/eng/products/mpumcu/rej09b0300_2140bhm.pdf
diff -Nur linux-4.2/Documentation/vm/00-INDEX linux-4.2-pck/Documentation/vm/00-INDEX
--- linux-4.2/Documentation/vm/00-INDEX	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/Documentation/vm/00-INDEX	2015-09-08 00:51:03.460668141 -0300
@@ -16,6 +16,8 @@
 	- explains what hwpoison is
 ksm.txt
 	- how to use the Kernel Samepage Merging feature.
+uksm.txt
+	- Introduction to Ultra KSM
 numa
 	- information about NUMA specific code in the Linux vm.
 numa_memory_policy.txt
diff -Nur linux-4.2/Documentation/vm/uksm.txt linux-4.2-pck/Documentation/vm/uksm.txt
--- linux-4.2/Documentation/vm/uksm.txt	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/Documentation/vm/uksm.txt	2015-09-08 00:51:03.460668141 -0300
@@ -0,0 +1,59 @@
+The Ultra Kernel Samepage Merging feature
+----------------------------------------------
+/*
+ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
+ *
+ * This is an improvement upon KSM. Some basic data structures and routines
+ * are borrowed from ksm.c .
+ *
+ * Its new features:
+ * 1. Full system scan:
+ *      It automatically scans all user processes' anonymous VMAs. Kernel-user
+ *      interaction to submit a memory area to KSM is no longer needed.
+ *
+ * 2. Rich area detection:
+ *      It automatically detects rich areas containing abundant duplicated
+ *      pages based. Rich areas are given a full scan speed. Poor areas are
+ *      sampled at a reasonable speed with very low CPU consumption.
+ *
+ * 3. Ultra Per-page scan speed improvement:
+ *      A new hash algorithm is proposed. As a result, on a machine with
+ *      Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
+ *      can scan memory areas that does not contain duplicated pages at speed of
+ *      627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
+ *      477MB/sec ~ 923MB/sec.
+ *
+ * 4. Thrashing area avoidance:
+ *      Thrashing area(an VMA that has frequent Ksm page break-out) can be
+ *      filtered out. My benchmark shows it's more efficient than KSM's per-page
+ *      hash value based volatile page detection.
+ *
+ *
+ * 5. Misc changes upon KSM:
+ *      * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
+ *        comparison. It's much faster than default C version on x86.
+ *      * rmap_item now has an struct *page member to loosely cache a
+ *        address-->page mapping, which reduces too much time-costly
+ *        follow_page().
+ *      * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
+ *      * try_to_merge_two_pages() now can revert a pte if it fails. No break_
+ *        ksm is needed for this case.
+ *
+ * 6. Full Zero Page consideration(contributed by Figo Zhang)
+ *    Now uksmd consider full zero pages as special pages and merge them to an
+ *    special unswappable uksm zero page.
+ */
+
+ChangeLog:
+
+2012-05-05 The creation of this Doc
+2012-05-08 UKSM 0.1.1.1 libc crash bug fix, api clean up, doc clean up.
+2012-05-28 UKSM 0.1.1.2 bug fix release
+2012-06-26 UKSM 0.1.2-beta1 first beta release for 0.1.2
+2012-07-2  UKSM 0.1.2-beta2
+2012-07-10 UKSM 0.1.2-beta3
+2012-07-26 UKSM 0.1.2 Fine grained speed control, more scan optimization.
+2012-10-13 UKSM 0.1.2.1 Bug fixes.
+2012-12-31 UKSM 0.1.2.2 Minor bug fixes.
+2014-07-02 UKSM 0.1.2.3 Fix a " __this_cpu_read() in preemptible bug".
+2015-04-22 UKSM 0.1.2.4 Fix a race condition that can sometimes trigger anonying warnings.
diff -Nur linux-4.2/MAINTAINERS linux-4.2-pck/MAINTAINERS
--- linux-4.2/MAINTAINERS	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/MAINTAINERS	2015-09-08 11:20:31.817043710 -0300
@@ -5807,6 +5807,19 @@
 F:	Documentation/kbuild/kconfig-language.txt
 F:	scripts/kconfig/
 
+KDBUS
+M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+M:	Daniel Mack <daniel@zonque.org>
+M:	David Herrmann <dh.herrmann@googlemail.com>
+M:	Djalal Harouni <tixxdz@opendz.org>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	ipc/kdbus/*
+F:	samples/kdbus/*
+F:	Documentation/kdbus/*
+F:	include/uapi/linux/kdbus.h
+F:	tools/testing/selftests/kdbus/
+
 KDUMP
 M:	Vivek Goyal <vgoyal@redhat.com>
 M:	Haren Myneni <hbabu@us.ibm.com>
@@ -10464,6 +10477,13 @@
 F:	drivers/tc/
 F:	include/linux/tc.h
 
+TUXONICE (ENHANCED HIBERNATION)
+P:	Nigel Cunningham
+M:	nigel@nigelcunningham.com.au
+L:	tuxonice-devel@tuxonice.net
+W:	http://tuxonice.net
+S:	Maintained
+
 U14-34F SCSI DRIVER
 M:	Dario Ballabio <ballabio_dario@emc.com>
 L:	linux-scsi@vger.kernel.org
diff -Nur linux-4.2/arch/x86/Kconfig.cpu linux-4.2-pck/arch/x86/Kconfig.cpu
--- linux-4.2/arch/x86/Kconfig.cpu	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/arch/x86/Kconfig.cpu	2015-09-08 00:50:52.507860491 -0300
@@ -137,9 +137,8 @@
 		-Paxville
 		-Dempsey
 
-
 config MK6
-	bool "K6/K6-II/K6-III"
+	bool "AMD K6/K6-II/K6-III"
 	depends on X86_32
 	---help---
 	  Select this for an AMD K6-family processor.  Enables use of
@@ -147,7 +146,7 @@
 	  flags to GCC.
 
 config MK7
-	bool "Athlon/Duron/K7"
+	bool "AMD Athlon/Duron/K7"
 	depends on X86_32
 	---help---
 	  Select this for an AMD Athlon K7-family processor.  Enables use of
@@ -155,12 +154,62 @@
 	  flags to GCC.
 
 config MK8
-	bool "Opteron/Athlon64/Hammer/K8"
+	bool "AMD Opteron/Athlon64/Hammer/K8"
 	---help---
 	  Select this for an AMD Opteron or Athlon64 Hammer-family processor.
 	  Enables use of some extended instructions, and passes appropriate
 	  optimization flags to GCC.
 
+config MK8SSE3
+	bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
+	---help---
+	  Select this for improved AMD Opteron or Athlon64 Hammer-family processors.
+	  Enables use of some extended instructions, and passes appropriate
+	  optimization flags to GCC.
+
+config MK10
+	bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
+	---help---
+	  Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
+		Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
+	  Enables use of some extended instructions, and passes appropriate
+	  optimization flags to GCC.
+
+config MBARCELONA
+	bool "AMD Barcelona"
+	---help---
+	  Select this for AMD Barcelona and newer processors.
+
+	  Enables -march=barcelona
+
+config MBOBCAT
+	bool "AMD Bobcat"
+	---help---
+	  Select this for AMD Bobcat processors.
+
+	  Enables -march=btver1
+
+config MBULLDOZER
+	bool "AMD Bulldozer"
+	---help---
+	  Select this for AMD Bulldozer processors.
+
+	  Enables -march=bdver1
+
+config MPILEDRIVER
+	bool "AMD Piledriver"
+	---help---
+	  Select this for AMD Piledriver processors.
+
+	  Enables -march=bdver2
+
+config MJAGUAR
+	bool "AMD Jaguar"
+	---help---
+	  Select this for AMD Jaguar processors.
+
+	  Enables -march=btver2
+
 config MCRUSOE
 	bool "Crusoe"
 	depends on X86_32
@@ -251,8 +300,17 @@
 	  using the cpu family field
 	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
 
+config MATOM
+	bool "Intel Atom"
+	---help---
+
+	  Select this for the Intel Atom platform. Intel Atom CPUs have an
+	  in-order pipelining architecture and thus can benefit from
+	  accordingly optimized code. Use a recent GCC with specific Atom
+	  support in order to fully benefit from selecting this option.
+
 config MCORE2
-	bool "Core 2/newer Xeon"
+	bool "Intel Core 2"
 	---help---
 
 	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
@@ -260,14 +318,63 @@
 	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
 	  (not a typo)
 
-config MATOM
-	bool "Intel Atom"
+	  Enables -march=core2
+
+config MNEHALEM
+	bool "Intel Nehalem"
 	---help---
 
-	  Select this for the Intel Atom platform. Intel Atom CPUs have an
-	  in-order pipelining architecture and thus can benefit from
-	  accordingly optimized code. Use a recent GCC with specific Atom
-	  support in order to fully benefit from selecting this option.
+	  Select this for 1st Gen Core processors in the Nehalem family.
+
+	  Enables -march=nehalem
+
+config MWESTMERE
+	bool "Intel Westmere"
+	---help---
+
+	  Select this for the Intel Westmere formerly Nehalem-C family.
+
+	  Enables -march=westmere
+
+config MSILVERMONT
+	bool "Intel Silvermont"
+	---help---
+
+	  Select this for the Intel Silvermont platform.
+
+	  Enables -march=silvermont
+
+config MSANDYBRIDGE
+	bool "Intel Sandy Bridge"
+	---help---
+
+	  Select this for 2nd Gen Core processors in the Sandy Bridge family.
+
+	  Enables -march=sandybridge
+
+config MIVYBRIDGE
+	bool "Intel Ivy Bridge"
+	---help---
+
+	  Select this for 3rd Gen Core processors in the Ivy Bridge family.
+
+	  Enables -march=ivybridge
+
+config MHASWELL
+	bool "Intel Haswell"
+	---help---
+
+	  Select this for 4th Gen Core processors in the Haswell family.
+
+	  Enables -march=haswell
+
+config MBROADWELL
+	bool "Intel Broadwell"
+	---help---
+
+	  Select this for 5th Gen Core processors in the Broadwell family.
+
+	  Enables -march=broadwell
 
 config GENERIC_CPU
 	bool "Generic-x86-64"
@@ -276,6 +383,19 @@
 	  Generic x86-64 CPU.
 	  Run equally well on all x86-64 CPUs.
 
+config MNATIVE
+ bool "Native optimizations autodetected by GCC"
+ ---help---
+
+   GCC 4.2 and above support -march=native, which automatically detects
+   the optimum settings to use based on your processor. -march=native 
+   also detects and applies additional settings beyond -march specific
+   to your CPU, (eg. -msse4). Unless you have a specific reason not to
+   (e.g. distcc cross-compiling), you should probably be using
+   -march=native rather than anything listed below.
+
+   Enables -march=native
+
 endchoice
 
 config X86_GENERIC
@@ -290,6 +410,16 @@
 	  This is really intended for distributors who need more
 	  generic optimizations.
 
+config X86_MARCH_NATIVE
+	bool "Use -march=native cflag (EXPERIMENTAL)"
+	help
+	  Setting Y here, will result in passing the -march=native and
+	  -mtune=native cflags to GCC while compiling the kernel, which
+	  makes GCC check the CPU capabilities and use the best cflags
+	  for your computer.
+
+	  Set Y here only if you use >=gcc-4.2.0.
+
 #
 # Define implied options from the CPU selection here
 config X86_INTERNODE_CACHE_SHIFT
@@ -300,7 +430,7 @@
 config X86_L1_CACHE_SHIFT
 	int
 	default "7" if MPENTIUM4 || MPSC
-	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
+	default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
 	default "4" if MELAN || M486 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
 
@@ -331,11 +461,11 @@
 
 config X86_INTEL_USERCOPY
 	def_bool y
-	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
+	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE
 
 config X86_USE_PPRO_CHECKSUM
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MATOM || MNATIVE
 
 config X86_USE_3DNOW
 	def_bool y
@@ -359,17 +489,17 @@
 
 config X86_TSC
 	def_bool y
-	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
+	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE || MATOM) || X86_64
 
 config X86_CMPXCHG64
 	def_bool y
-	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
+	depends on X86_PAE || X86_64 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
 
 # this should be set for all -march=.. options where the compiler
 # generates cmov.
 config X86_CMOV
 	def_bool y
-	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
+	depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
 
 config X86_MINIMUM_CPU_FAMILY
 	int
diff -Nur linux-4.2/arch/x86/Makefile linux-4.2-pck/arch/x86/Makefile
--- linux-4.2/arch/x86/Makefile	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/arch/x86/Makefile	2015-09-08 00:50:52.507860491 -0300
@@ -94,13 +94,35 @@
 	KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
 
         # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
+        cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
         cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
+        cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-mtune=k8)
+        cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
+        cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
+        cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
+        cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
+        cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
+        cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
         cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
 
         cflags-$(CONFIG_MCORE2) += \
-                $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
-	cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
-		$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
+                $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))
+        cflags-$(CONFIG_MNEHALEM) += \
+                $(call cc-option,-march=nehalem,$(call cc-option,-mtune=nehalem))
+        cflags-$(CONFIG_MWESTMERE) += \
+                $(call cc-option,-march=westmere,$(call cc-option,-mtune=westmere))
+        cflags-$(CONFIG_MSILVERMONT) += \
+                $(call cc-option,-march=silvermont,$(call cc-option,-mtune=silvermont))
+        cflags-$(CONFIG_MSANDYBRIDGE) += \
+                $(call cc-option,-march=sandybridge,$(call cc-option,-mtune=sandybridge))
+        cflags-$(CONFIG_MIVYBRIDGE) += \
+                $(call cc-option,-march=ivybridge,$(call cc-option,-mtune=ivybridge))
+        cflags-$(CONFIG_MHASWELL) += \
+                $(call cc-option,-march=haswell,$(call cc-option,-mtune=haswell))
+        cflags-$(CONFIG_MBROADWELL) += \
+                $(call cc-option,-march=broadwell,$(call cc-option,-mtune=broadwell))
+        cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \
+                $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
         cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
         KBUILD_CFLAGS += $(cflags-y)
 
diff -Nur linux-4.2/arch/x86/Makefile_32.cpu linux-4.2-pck/arch/x86/Makefile_32.cpu
--- linux-4.2/arch/x86/Makefile_32.cpu	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/arch/x86/Makefile_32.cpu	2015-09-08 00:50:52.507860491 -0300
@@ -10,6 +10,9 @@
 endif
 
 align := $(cc-option-align)
+ifeq ($(CONFIG_X86_MARCH_NATIVE),y)
+cflags-y			+= -march=native
+else
 cflags-$(CONFIG_M486)		+= -march=i486
 cflags-$(CONFIG_M586)		+= -march=i586
 cflags-$(CONFIG_M586TSC)	+= -march=i586
@@ -23,7 +26,15 @@
 # Please note, that patches that add -march=athlon-xp and friends are pointless.
 # They make zero difference whatsosever to performance at this time.
 cflags-$(CONFIG_MK7)		+= -march=athlon
+cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
 cflags-$(CONFIG_MK8)		+= $(call cc-option,-march=k8,-march=athlon)
+cflags-$(CONFIG_MK8SSE3)		+= $(call cc-option,-march=k8-sse3,-march=athlon)
+cflags-$(CONFIG_MK10)	+= $(call cc-option,-march=amdfam10,-march=athlon)
+cflags-$(CONFIG_MBARCELONA)	+= $(call cc-option,-march=barcelona,-march=athlon)
+cflags-$(CONFIG_MBOBCAT)	+= $(call cc-option,-march=btver1,-march=athlon)
+cflags-$(CONFIG_MBULLDOZER)	+= $(call cc-option,-march=bdver1,-march=athlon)
+cflags-$(CONFIG_MPILEDRIVER)	+= $(call cc-option,-march=bdver2,-march=athlon)
+cflags-$(CONFIG_MJAGUAR)	+= $(call cc-option,-march=btver2,-march=athlon)
 cflags-$(CONFIG_MCRUSOE)	+= -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
 cflags-$(CONFIG_MEFFICEON)	+= -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
 cflags-$(CONFIG_MWINCHIPC6)	+= $(call cc-option,-march=winchip-c6,-march=i586)
@@ -32,8 +43,15 @@
 cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
 cflags-$(CONFIG_MVIAC7)		+= -march=i686
 cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
-cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
-	$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
+cflags-$(CONFIG_MNEHALEM)	+= -march=i686 $(call tune,nehalem)
+cflags-$(CONFIG_MWESTMERE)	+= -march=i686 $(call tune,westmere)
+cflags-$(CONFIG_MSILVERMONT)	+= -march=i686 $(call tune,silvermont)
+cflags-$(CONFIG_MSANDYBRIDGE)	+= -march=i686 $(call tune,sandybridge)
+cflags-$(CONFIG_MIVYBRIDGE)	+= -march=i686 $(call tune,ivybridge)
+cflags-$(CONFIG_MHASWELL)	+= -march=i686 $(call tune,haswell)
+cflags-$(CONFIG_MBROADWELL)	+= -march=i686 $(call tune,broadwell)
+cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \
+	$(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
 
 # AMD Elan support
 cflags-$(CONFIG_MELAN)		+= -march=i486
@@ -41,6 +59,8 @@
 # Geode GX1 support
 cflags-$(CONFIG_MGEODEGX1)	+= -march=pentium-mmx
 cflags-$(CONFIG_MGEODE_LX)	+= $(call cc-option,-march=geode,-march=pentium-mmx)
+endif
+
 # add at the end to overwrite eventual tuning options from earlier
 # cpu entries
 cflags-$(CONFIG_X86_GENERIC) 	+= $(call tune,generic,$(call tune,i686))
diff -Nur linux-4.2/arch/x86/include/asm/module.h linux-4.2-pck/arch/x86/include/asm/module.h
--- linux-4.2/arch/x86/include/asm/module.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/arch/x86/include/asm/module.h	2015-09-08 00:50:52.507860491 -0300
@@ -15,6 +15,22 @@
 #define MODULE_PROC_FAMILY "586MMX "
 #elif defined CONFIG_MCORE2
 #define MODULE_PROC_FAMILY "CORE2 "
+#elif defined CONFIG_MNATIVE
+#define MODULE_PROC_FAMILY "NATIVE "
+#elif defined CONFIG_MNEHALEM
+#define MODULE_PROC_FAMILY "NEHALEM "
+#elif defined CONFIG_MWESTMERE
+#define MODULE_PROC_FAMILY "WESTMERE "
+#elif defined CONFIG_MSILVERMONT
+#define MODULE_PROC_FAMILY "SILVERMONT "
+#elif defined CONFIG_MSANDYBRIDGE
+#define MODULE_PROC_FAMILY "SANDYBRIDGE "
+#elif defined CONFIG_MIVYBRIDGE
+#define MODULE_PROC_FAMILY "IVYBRIDGE "
+#elif defined CONFIG_MHASWELL
+#define MODULE_PROC_FAMILY "HASWELL "
+#elif defined CONFIG_MBROADWELL
+#define MODULE_PROC_FAMILY "BROADWELL "
 #elif defined CONFIG_MATOM
 #define MODULE_PROC_FAMILY "ATOM "
 #elif defined CONFIG_M686
@@ -33,6 +49,20 @@
 #define MODULE_PROC_FAMILY "K7 "
 #elif defined CONFIG_MK8
 #define MODULE_PROC_FAMILY "K8 "
+#elif defined CONFIG_MK8SSE3
+#define MODULE_PROC_FAMILY "K8SSE3 "
+#elif defined CONFIG_MK10
+#define MODULE_PROC_FAMILY "K10 "
+#elif defined CONFIG_MBARCELONA
+#define MODULE_PROC_FAMILY "BARCELONA "
+#elif defined CONFIG_MBOBCAT
+#define MODULE_PROC_FAMILY "BOBCAT "
+#elif defined CONFIG_MBULLDOZER
+#define MODULE_PROC_FAMILY "BULLDOZER "
+#elif defined CONFIG_MPILEDRIVER
+#define MODULE_PROC_FAMILY "PILEDRIVER "
+#elif defined CONFIG_MJAGUAR
+#define MODULE_PROC_FAMILY "JAGUAR "
 #elif defined CONFIG_MELAN
 #define MODULE_PROC_FAMILY "ELAN "
 #elif defined CONFIG_MCRUSOE
diff -Nur linux-4.2/arch/x86/kernel/early_printk.c linux-4.2-pck/arch/x86/kernel/early_printk.c
--- linux-4.2/arch/x86/kernel/early_printk.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/arch/x86/kernel/early_printk.c	2015-09-08 00:48:22.215031180 -0300
@@ -27,7 +27,8 @@
 static int max_ypos = 25, max_xpos = 80;
 static int current_ypos = 25, current_xpos;
 
-static void early_vga_write(struct console *con, const char *str, unsigned n)
+static void early_vga_write(struct console *con, const char *str, unsigned n,
+                            unsigned int loglevel)
 {
 	char c;
 	int  i, k, j;
@@ -118,7 +119,8 @@
 	return timeout ? 0 : -1;
 }
 
-static void early_serial_write(struct console *con, const char *s, unsigned n)
+static void early_serial_write(struct console *con, const char *s, unsigned n,
+                               unsigned int loglevel)
 {
 	while (*s && n-- > 0) {
 		if (*s == '\n')
diff -Nur linux-4.2/arch/x86/kernel/espfix_64.c linux-4.2-pck/arch/x86/kernel/espfix_64.c
--- linux-4.2/arch/x86/kernel/espfix_64.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/arch/x86/kernel/espfix_64.c	2015-09-08 11:20:31.877040782 -0300
@@ -173,6 +173,7 @@
 		struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
 
 		pmd_p = (pmd_t *)page_address(page);
+                SetPageTOI_Untracked(virt_to_page(pmd_p));
 		pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PUD_CLONES; n++)
@@ -185,6 +186,7 @@
 		struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
 
 		pte_p = (pte_t *)page_address(page);
+                SetPageTOI_Untracked(virt_to_page(pte_p));
 		pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
 		paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PMD_CLONES; n++)
@@ -193,6 +195,7 @@
 
 	pte_p = pte_offset_kernel(&pmd, addr);
 	stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
+        SetPageTOI_Untracked(virt_to_page(stack_page));
 	pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
 	for (n = 0; n < ESPFIX_PTE_CLONES; n++)
 		set_pte(&pte_p[n*PTE_STRIDE], pte);
diff -Nur linux-4.2/arch/x86/kernel/tsc.c linux-4.2-pck/arch/x86/kernel/tsc.c
--- linux-4.2/arch/x86/kernel/tsc.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/arch/x86/kernel/tsc.c	2015-09-08 11:20:31.883707121 -0300
@@ -12,6 +12,7 @@
 #include <linux/percpu.h>
 #include <linux/timex.h>
 #include <linux/static_key.h>
+#include <linux/mm.h>
 
 #include <asm/hpet.h>
 #include <asm/timer.h>
@@ -195,6 +196,10 @@
 
 	c2n->head = c2n->data;
 	c2n->tail = c2n->data;
+
+        // Don't let TuxOnIce make data RO - a secondary CPU will cause a triple fault
+        // if it loads microcode, which then does a printk, which may end up invoking cycles_2_ns
+        SetPageTOI_Untracked(virt_to_page(c2n));
 }
 
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
diff -Nur linux-4.2/arch/x86/mm/fault.c linux-4.2-pck/arch/x86/mm/fault.c
--- linux-4.2/arch/x86/mm/fault.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/arch/x86/mm/fault.c	2015-09-08 11:20:31.893706633 -0300
@@ -13,6 +13,7 @@
 #include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
 #include <linux/prefetch.h>		/* prefetchw			*/
 #include <linux/context_tracking.h>	/* exception_enter(), ...	*/
+#include <linux/tuxonice.h>             /* incremental image support    */
 #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
@@ -652,6 +653,10 @@
 	unsigned long flags;
 	int sig;
 
+        if (toi_make_writable(init_mm.pgd, address)) {
+            return;
+        }
+
 	/* Are we prepared to handle this kernel fault? */
 	if (fixup_exception(regs)) {
 		/*
@@ -906,10 +911,101 @@
 	}
 }
 
+#ifdef CONFIG_TOI_INCREMENTAL
+/**
+ * _toi_do_cbw - Do a copy-before-write before letting the faulting process continue
+ */
+static void toi_do_cbw(struct page *page)
+{
+    struct toi_cbw_state *state = this_cpu_ptr(&toi_cbw_states);
+
+    state->active = 1;
+    wmb();
+
+    if (state->enabled && state->next && PageTOI_CBW(page)) {
+        struct toi_cbw *this = state->next;
+        memcpy(this->virt, page_address(page), PAGE_SIZE);
+        this->pfn = page_to_pfn(page);
+        state->next = this->next;
+    }
+
+    state->active = 0;
+}
+
+/**
+ * _toi_make_writable - Defuse TOI's write protection
+ */
+int _toi_make_writable(pte_t *pte)
+{
+    struct page *page = pte_page(*pte);
+    if (PageTOI_RO(page)) {
+        pgd_t *pgd = __va(read_cr3());
+        /*
+         * If this is a TuxOnIce caused fault, we may not have permission to
+         * write to a page needed to reset the permissions of the original
+         * page. Use swapper_pg_dir to get around this.
+         */
+        load_cr3(swapper_pg_dir);
+
+        set_pte_atomic(pte, pte_mkwrite(*pte));
+        SetPageTOI_Dirty(page);
+        ClearPageTOI_RO(page);
+
+        toi_do_cbw(page);
+
+        load_cr3(pgd);
+        return 1;
+    }
+    return 0;
+}
+
+/**
+ * toi_make_writable - Handle a (potential) fault caused by TOI's write protection
+ *
+ * Make a page writable that was protected. Might be because of a fault, or
+ * because we're allocating it and want it to be untracked.
+ *
+ * Note that in the fault handling case, we don't care about the error code. If
+ * called from the double fault handler, we won't have one. We just check to
+ * see if the page was made RO by TOI, and mark it dirty/release the protection
+ * if it was.
+ */
+int toi_make_writable(pgd_t *pgd, unsigned long address)
+{
+    pud_t *pud;
+    pmd_t *pmd;
+    pte_t *pte;
+
+    pgd = pgd + pgd_index(address);
+    if (!pgd_present(*pgd))
+        return 0;
+
+    pud = pud_offset(pgd, address);
+    if (!pud_present(*pud))
+        return 0;
+
+    if (pud_large(*pud))
+        return _toi_make_writable((pte_t *) pud);
+
+    pmd = pmd_offset(pud, address);
+    if (!pmd_present(*pmd))
+        return 0;
+
+    if (pmd_large(*pmd))
+        return _toi_make_writable((pte_t *) pmd);
+
+    pte = pte_offset_kernel(pmd, address);
+    if (!pte_present(*pte))
+        return 0;
+
+    return _toi_make_writable(pte);
+}
+#endif
+
 static int spurious_fault_check(unsigned long error_code, pte_t *pte)
 {
 	if ((error_code & PF_WRITE) && !pte_write(*pte))
-		return 0;
+                return 0;
 
 	if ((error_code & PF_INSTR) && !pte_exec(*pte))
 		return 0;
@@ -1072,6 +1168,15 @@
 		kmemcheck_hide(regs);
 	prefetchw(&mm->mmap_sem);
 
+        /*
+         * Detect and handle page faults due to TuxOnIce making pages read-only
+         * so that it can create incremental images.
+         *
+         * Do it early to avoid double faults.
+         */
+        if (unlikely(toi_make_writable(init_mm.pgd, address)))
+            return;
+
 	if (unlikely(kmmio_fault(regs, address)))
 		return;
 
diff -Nur linux-4.2/arch/x86/mm/init.c linux-4.2-pck/arch/x86/mm/init.c
--- linux-4.2/arch/x86/mm/init.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/arch/x86/mm/init.c	2015-09-08 11:20:31.897039804 -0300
@@ -147,9 +147,10 @@
 
 static void __init probe_page_size_mask(void)
 {
-#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
+#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) && !defined(CONFIG_TOI_INCREMENTAL)
 	/*
-	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+         * For CONFIG_DEBUG_PAGEALLOC or TuxOnIce's incremental image support,
+         * identity mapping will use small pages.
 	 * This will simplify cpa(), which otherwise needs to support splitting
 	 * large pages into small in interrupt context, etc.
 	 */
diff -Nur linux-4.2/arch/x86/platform/efi/early_printk.c linux-4.2-pck/arch/x86/platform/efi/early_printk.c
--- linux-4.2/arch/x86/platform/efi/early_printk.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/arch/x86/platform/efi/early_printk.c	2015-09-08 00:48:22.215031180 -0300
@@ -124,7 +124,8 @@
 }
 
 static void
-early_efi_write(struct console *con, const char *str, unsigned int num)
+early_efi_write(struct console *con, const char *str, unsigned int num,
+		unsigned int loglevel)
 {
 	struct screen_info *si;
 	unsigned int len;
diff -Nur linux-4.2/block/Kconfig.iosched linux-4.2-pck/block/Kconfig.iosched
--- linux-4.2/block/Kconfig.iosched	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/block/Kconfig.iosched	2015-09-08 00:48:22.215031180 -0300
@@ -39,9 +39,30 @@
 	---help---
 	  Enable group IO scheduling in CFQ.
 
+config IOSCHED_BFQ
+	tristate "BFQ I/O scheduler"
+	default y
+	---help---
+	  The BFQ I/O scheduler tries to distribute bandwidth among
+	  all processes according to their weights.
+	  It aims at distributing the bandwidth as desired, independently of
+	  the disk parameters and with any workload. It also tries to
+	  guarantee low latency to interactive and soft real-time
+	  applications. If compiled built-in (saying Y here), BFQ can
+	  be configured to support hierarchical scheduling.
+
+config CGROUP_BFQIO
+	bool "BFQ hierarchical scheduling support"
+	depends on CGROUPS && IOSCHED_BFQ=y
+	default n
+	---help---
+	  Enable hierarchical scheduling in BFQ, using the cgroups
+	  filesystem interface.  The name of the subsystem will be
+	  bfqio.
+
 choice
 	prompt "Default I/O scheduler"
-	default DEFAULT_CFQ
+	default DEFAULT_BFQ
 	help
 	  Select the I/O scheduler which will be used by default for all
 	  block devices.
@@ -52,6 +73,16 @@
 	config DEFAULT_CFQ
 		bool "CFQ" if IOSCHED_CFQ=y
 
+	config DEFAULT_BFQ
+		bool "BFQ" if IOSCHED_BFQ=y
+		help
+		  Selects BFQ as the default I/O scheduler which will be
+		  used by default for all block devices.
+		  The BFQ I/O scheduler aims at distributing the bandwidth
+		  as desired, independently of the disk parameters and with
+		  any workload. It also tries to guarantee low latency to
+		  interactive and soft real-time applications.
+
 	config DEFAULT_NOOP
 		bool "No-op"
 
@@ -61,6 +92,7 @@
 	string
 	default "deadline" if DEFAULT_DEADLINE
 	default "cfq" if DEFAULT_CFQ
+	default "bfq" if DEFAULT_BFQ
 	default "noop" if DEFAULT_NOOP
 
 endmenu
diff -Nur linux-4.2/block/Makefile linux-4.2-pck/block/Makefile
--- linux-4.2/block/Makefile	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/block/Makefile	2015-09-08 11:20:31.933704681 -0300
@@ -7,7 +7,7 @@
 			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
 			blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
 			blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
-			genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
+			uuid.o genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
 			partitions/
 
 obj-$(CONFIG_BOUNCE)	+= bounce.o
@@ -18,6 +18,7 @@
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
+obj-$(CONFIG_IOSCHED_BFQ)	+= bfq-iosched.o
 
 obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
 obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
diff -Nur linux-4.2/block/bfq-cgroup.c linux-4.2-pck/block/bfq-cgroup.c
--- linux-4.2/block/bfq-cgroup.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/block/bfq-cgroup.c	2015-09-08 00:48:22.215031180 -0300
@@ -0,0 +1,936 @@
+/*
+ * BFQ: CGROUPS support.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
+ * file.
+ */
+
+#ifdef CONFIG_CGROUP_BFQIO
+
+static DEFINE_MUTEX(bfqio_mutex);
+
+static bool bfqio_is_removed(struct bfqio_cgroup *bgrp)
+{
+	return bgrp ? !bgrp->online : false;
+}
+
+static struct bfqio_cgroup bfqio_root_cgroup = {
+	.weight = BFQ_DEFAULT_GRP_WEIGHT,
+	.ioprio = BFQ_DEFAULT_GRP_IOPRIO,
+	.ioprio_class = BFQ_DEFAULT_GRP_CLASS,
+};
+
+static inline void bfq_init_entity(struct bfq_entity *entity,
+				   struct bfq_group *bfqg)
+{
+	entity->weight = entity->new_weight;
+	entity->orig_weight = entity->new_weight;
+	entity->ioprio = entity->new_ioprio;
+	entity->ioprio_class = entity->new_ioprio_class;
+	entity->parent = bfqg->my_entity;
+	entity->sched_data = &bfqg->sched_data;
+}
+
+static struct bfqio_cgroup *css_to_bfqio(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct bfqio_cgroup, css) : NULL;
+}
+
+/*
+ * Search the bfq_group for bfqd into the hash table (by now only a list)
+ * of bgrp.  Must be called under rcu_read_lock().
+ */
+static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp,
+					    struct bfq_data *bfqd)
+{
+	struct bfq_group *bfqg;
+	void *key;
+
+	hlist_for_each_entry_rcu(bfqg, &bgrp->group_data, group_node) {
+		key = rcu_dereference(bfqg->bfqd);
+		if (key == bfqd)
+			return bfqg;
+	}
+
+	return NULL;
+}
+
+static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,
+					 struct bfq_group *bfqg)
+{
+	struct bfq_entity *entity = &bfqg->entity;
+
+	/*
+	 * If the weight of the entity has never been set via the sysfs
+	 * interface, then bgrp->weight == 0. In this case we initialize
+	 * the weight from the current ioprio value. Otherwise, the group
+	 * weight, if set, has priority over the ioprio value.
+	 */
+	if (bgrp->weight == 0) {
+		entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio);
+		entity->new_ioprio = bgrp->ioprio;
+	} else {
+		if (bgrp->weight < BFQ_MIN_WEIGHT ||
+		    bgrp->weight > BFQ_MAX_WEIGHT) {
+			printk(KERN_CRIT "bfq_group_init_entity: "
+					 "bgrp->weight %d\n", bgrp->weight);
+			BUG();
+		}
+		entity->new_weight = bgrp->weight;
+		entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight);
+	}
+	entity->orig_weight = entity->weight = entity->new_weight;
+	entity->ioprio = entity->new_ioprio;
+	entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;
+	entity->my_sched_data = &bfqg->sched_data;
+	bfqg->active_entities = 0;
+}
+
+static inline void bfq_group_set_parent(struct bfq_group *bfqg,
+					struct bfq_group *parent)
+{
+	struct bfq_entity *entity;
+
+	BUG_ON(parent == NULL);
+	BUG_ON(bfqg == NULL);
+
+	entity = &bfqg->entity;
+	entity->parent = parent->my_entity;
+	entity->sched_data = &parent->sched_data;
+}
+
+/**
+ * bfq_group_chain_alloc - allocate a chain of groups.
+ * @bfqd: queue descriptor.
+ * @css: the leaf cgroup_subsys_state this chain starts from.
+ *
+ * Allocate a chain of groups starting from the one belonging to
+ * @cgroup up to the root cgroup.  Stop if a cgroup on the chain
+ * to the root has already an allocated group on @bfqd.
+ */
+static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,
+					       struct cgroup_subsys_state *css)
+{
+	struct bfqio_cgroup *bgrp;
+	struct bfq_group *bfqg, *prev = NULL, *leaf = NULL;
+
+	for (; css != NULL; css = css->parent) {
+		bgrp = css_to_bfqio(css);
+
+		bfqg = bfqio_lookup_group(bgrp, bfqd);
+		if (bfqg != NULL) {
+			/*
+			 * All the cgroups in the path from there to the
+			 * root must have a bfq_group for bfqd, so we don't
+			 * need any more allocations.
+			 */
+			break;
+		}
+
+		bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);
+		if (bfqg == NULL)
+			goto cleanup;
+
+		bfq_group_init_entity(bgrp, bfqg);
+		bfqg->my_entity = &bfqg->entity;
+
+		if (leaf == NULL) {
+			leaf = bfqg;
+			prev = leaf;
+		} else {
+			bfq_group_set_parent(prev, bfqg);
+			/*
+			 * Build a list of allocated nodes using the bfqd
+			 * filed, that is still unused and will be
+			 * initialized only after the node will be
+			 * connected.
+			 */
+			prev->bfqd = bfqg;
+			prev = bfqg;
+		}
+	}
+
+	return leaf;
+
+cleanup:
+	while (leaf != NULL) {
+		prev = leaf;
+		leaf = leaf->bfqd;
+		kfree(prev);
+	}
+
+	return NULL;
+}
+
+/**
+ * bfq_group_chain_link - link an allocated group chain to a cgroup
+ *                        hierarchy.
+ * @bfqd: the queue descriptor.
+ * @css: the leaf cgroup_subsys_state to start from.
+ * @leaf: the leaf group (to be associated to @cgroup).
+ *
+ * Try to link a chain of groups to a cgroup hierarchy, connecting the
+ * nodes bottom-up, so we can be sure that when we find a cgroup in the
+ * hierarchy that already as a group associated to @bfqd all the nodes
+ * in the path to the root cgroup have one too.
+ *
+ * On locking: the queue lock protects the hierarchy (there is a hierarchy
+ * per device) while the bfqio_cgroup lock protects the list of groups
+ * belonging to the same cgroup.
+ */
+static void bfq_group_chain_link(struct bfq_data *bfqd,
+				 struct cgroup_subsys_state *css,
+				 struct bfq_group *leaf)
+{
+	struct bfqio_cgroup *bgrp;
+	struct bfq_group *bfqg, *next, *prev = NULL;
+	unsigned long flags;
+
+	assert_spin_locked(bfqd->queue->queue_lock);
+
+	for (; css != NULL && leaf != NULL; css = css->parent) {
+		bgrp = css_to_bfqio(css);
+		next = leaf->bfqd;
+
+		bfqg = bfqio_lookup_group(bgrp, bfqd);
+		BUG_ON(bfqg != NULL);
+
+		spin_lock_irqsave(&bgrp->lock, flags);
+
+		rcu_assign_pointer(leaf->bfqd, bfqd);
+		hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);
+		hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);
+
+		spin_unlock_irqrestore(&bgrp->lock, flags);
+
+		prev = leaf;
+		leaf = next;
+	}
+
+	BUG_ON(css == NULL && leaf != NULL);
+	if (css != NULL && prev != NULL) {
+		bgrp = css_to_bfqio(css);
+		bfqg = bfqio_lookup_group(bgrp, bfqd);
+		bfq_group_set_parent(prev, bfqg);
+	}
+}
+
+/**
+ * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.
+ * @bfqd: queue descriptor.
+ * @cgroup: cgroup being searched for.
+ *
+ * Return a group associated to @bfqd in @cgroup, allocating one if
+ * necessary.  When a group is returned all the cgroups in the path
+ * to the root have a group associated to @bfqd.
+ *
+ * If the allocation fails, return the root group: this breaks guarantees
+ * but is a safe fallback.  If this loss becomes a problem it can be
+ * mitigated using the equivalent weight (given by the product of the
+ * weights of the groups in the path from @group to the root) in the
+ * root scheduler.
+ *
+ * We allocate all the missing nodes in the path from the leaf cgroup
+ * to the root and we connect the nodes only after all the allocations
+ * have been successful.
+ */
+static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+					      struct cgroup_subsys_state *css)
+{
+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);
+	struct bfq_group *bfqg;
+
+	bfqg = bfqio_lookup_group(bgrp, bfqd);
+	if (bfqg != NULL)
+		return bfqg;
+
+	bfqg = bfq_group_chain_alloc(bfqd, css);
+	if (bfqg != NULL)
+		bfq_group_chain_link(bfqd, css, bfqg);
+	else
+		bfqg = bfqd->root_group;
+
+	return bfqg;
+}
+
+/**
+ * bfq_bfqq_move - migrate @bfqq to @bfqg.
+ * @bfqd: queue descriptor.
+ * @bfqq: the queue to move.
+ * @entity: @bfqq's entity.
+ * @bfqg: the group to move to.
+ *
+ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
+ * it on the new one.  Avoid putting the entity on the old group idle tree.
+ *
+ * Must be called under the queue lock; the cgroup owning @bfqg must
+ * not disappear (by now this just means that we are called under
+ * rcu_read_lock()).
+ */
+static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			  struct bfq_entity *entity, struct bfq_group *bfqg)
+{
+	int busy, resume;
+
+	busy = bfq_bfqq_busy(bfqq);
+	resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
+
+	BUG_ON(resume && !entity->on_st);
+	BUG_ON(busy && !resume && entity->on_st &&
+	       bfqq != bfqd->in_service_queue);
+
+	if (busy) {
+		BUG_ON(atomic_read(&bfqq->ref) < 2);
+
+		if (!resume)
+			bfq_del_bfqq_busy(bfqd, bfqq, 0);
+		else
+			bfq_deactivate_bfqq(bfqd, bfqq, 0);
+	} else if (entity->on_st)
+		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
+
+	/*
+	 * Here we use a reference to bfqg.  We don't need a refcounter
+	 * as the cgroup reference will not be dropped, so that its
+	 * destroy() callback will not be invoked.
+	 */
+	entity->parent = bfqg->my_entity;
+	entity->sched_data = &bfqg->sched_data;
+
+	if (busy && resume)
+		bfq_activate_bfqq(bfqd, bfqq);
+
+	if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver)
+		bfq_schedule_dispatch(bfqd);
+}
+
+/**
+ * __bfq_bic_change_cgroup - move @bic to @cgroup.
+ * @bfqd: the queue descriptor.
+ * @bic: the bic to move.
+ * @cgroup: the cgroup to move to.
+ *
+ * Move bic to cgroup, assuming that bfqd->queue is locked; the caller
+ * has to make sure that the reference to cgroup is valid across the call.
+ *
+ * NOTE: an alternative approach might have been to store the current
+ * cgroup in bfqq and getting a reference to it, reducing the lookup
+ * time here, at the price of slightly more complex code.
+ */
+static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+						struct bfq_io_cq *bic,
+						struct cgroup_subsys_state *css)
+{
+	struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
+	struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
+	struct bfq_entity *entity;
+	struct bfq_group *bfqg;
+	struct bfqio_cgroup *bgrp;
+
+	bgrp = css_to_bfqio(css);
+
+	bfqg = bfq_find_alloc_group(bfqd, css);
+	if (async_bfqq != NULL) {
+		entity = &async_bfqq->entity;
+
+		if (entity->sched_data != &bfqg->sched_data) {
+			bic_set_bfqq(bic, NULL, 0);
+			bfq_log_bfqq(bfqd, async_bfqq,
+				     "bic_change_group: %p %d",
+				     async_bfqq, atomic_read(&async_bfqq->ref));
+			bfq_put_queue(async_bfqq);
+		}
+	}
+
+	if (sync_bfqq != NULL) {
+		entity = &sync_bfqq->entity;
+		if (entity->sched_data != &bfqg->sched_data)
+			bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
+	}
+
+	return bfqg;
+}
+
+/**
+ * bfq_bic_change_cgroup - move @bic to @cgroup.
+ * @bic: the bic being migrated.
+ * @cgroup: the destination cgroup.
+ *
+ * When the task owning @bic is moved to @cgroup, @bic is immediately
+ * moved into its new parent group.
+ */
+static void bfq_bic_change_cgroup(struct bfq_io_cq *bic,
+				  struct cgroup_subsys_state *css)
+{
+	struct bfq_data *bfqd;
+	unsigned long uninitialized_var(flags);
+
+	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
+				   &flags);
+	if (bfqd != NULL) {
+		__bfq_bic_change_cgroup(bfqd, bic, css);
+		bfq_put_bfqd_unlock(bfqd, &flags);
+	}
+}
+
+/**
+ * bfq_bic_update_cgroup - update the cgroup of @bic.
+ * @bic: the @bic to update.
+ *
+ * Make sure that @bic is enqueued in the cgroup of the current task.
+ * We need this in addition to moving bics during the cgroup attach
+ * phase because the task owning @bic could be at its first disk
+ * access or we may end up in the root cgroup as the result of a
+ * memory allocation failure and here we try to move to the right
+ * group.
+ *
+ * Must be called under the queue lock.  It is safe to use the returned
+ * value even after the rcu_read_unlock() as the migration/destruction
+ * paths act under the queue lock too.  IOW it is impossible to race with
+ * group migration/destruction and end up with an invalid group as:
+ *   a) here cgroup has not yet been destroyed, nor its destroy callback
+ *      has started execution, as current holds a reference to it,
+ *   b) if it is destroyed after rcu_read_unlock() [after current is
+ *      migrated to a different cgroup] its attach() callback will have
+ *      taken care of remove all the references to the old cgroup data.
+ */
+static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic)
+{
+	struct bfq_data *bfqd = bic_to_bfqd(bic);
+	struct bfq_group *bfqg;
+	struct cgroup_subsys_state *css;
+
+	BUG_ON(bfqd == NULL);
+
+	rcu_read_lock();
+	css = task_css(current, bfqio_cgrp_id);
+	bfqg = __bfq_bic_change_cgroup(bfqd, bic, css);
+	rcu_read_unlock();
+
+	return bfqg;
+}
+
+/**
+ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
+ * @st: the service tree being flushed.
+ */
+static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)
+{
+	struct bfq_entity *entity = st->first_idle;
+
+	for (; entity != NULL; entity = st->first_idle)
+		__bfq_deactivate_entity(entity, 0);
+}
+
+/**
+ * bfq_reparent_leaf_entity - move leaf entity to the root_group.
+ * @bfqd: the device data structure with the root group.
+ * @entity: the entity to move.
+ */
+static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
+					    struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+	BUG_ON(bfqq == NULL);
+	bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
+	return;
+}
+
+/**
+ * bfq_reparent_active_entities - move to the root group all active
+ *                                entities.
+ * @bfqd: the device data structure with the root group.
+ * @bfqg: the group to move from.
+ * @st: the service tree with the entities.
+ *
+ * Needs queue_lock to be taken and reference to be valid over the call.
+ */
+static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
+						struct bfq_group *bfqg,
+						struct bfq_service_tree *st)
+{
+	struct rb_root *active = &st->active;
+	struct bfq_entity *entity = NULL;
+
+	if (!RB_EMPTY_ROOT(&st->active))
+		entity = bfq_entity_of(rb_first(active));
+
+	for (; entity != NULL; entity = bfq_entity_of(rb_first(active)))
+		bfq_reparent_leaf_entity(bfqd, entity);
+
+	if (bfqg->sched_data.in_service_entity != NULL)
+		bfq_reparent_leaf_entity(bfqd,
+			bfqg->sched_data.in_service_entity);
+
+	return;
+}
+
+/**
+ * bfq_destroy_group - destroy @bfqg.
+ * @bgrp: the bfqio_cgroup containing @bfqg.
+ * @bfqg: the group being destroyed.
+ *
+ * Destroy @bfqg, making sure that it is not referenced from its parent.
+ */
+static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
+{
+	struct bfq_data *bfqd;
+	struct bfq_service_tree *st;
+	struct bfq_entity *entity = bfqg->my_entity;
+	unsigned long uninitialized_var(flags);
+	int i;
+
+	hlist_del(&bfqg->group_node);
+
+	/*
+	 * Empty all service_trees belonging to this group before
+	 * deactivating the group itself.
+	 */
+	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
+		st = bfqg->sched_data.service_tree + i;
+
+		/*
+		 * The idle tree may still contain bfq_queues belonging
+		 * to exited task because they never migrated to a different
+		 * cgroup from the one being destroyed now.  No one else
+		 * can access them so it's safe to act without any lock.
+		 */
+		bfq_flush_idle_tree(st);
+
+		/*
+		 * It may happen that some queues are still active
+		 * (busy) upon group destruction (if the corresponding
+		 * processes have been forced to terminate). We move
+		 * all the leaf entities corresponding to these queues
+		 * to the root_group.
+		 * Also, it may happen that the group has an entity
+		 * in service, which is disconnected from the active
+		 * tree: it must be moved, too.
+		 * There is no need to put the sync queues, as the
+		 * scheduler has taken no reference.
+		 */
+		bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
+		if (bfqd != NULL) {
+			bfq_reparent_active_entities(bfqd, bfqg, st);
+			bfq_put_bfqd_unlock(bfqd, &flags);
+		}
+		BUG_ON(!RB_EMPTY_ROOT(&st->active));
+		BUG_ON(!RB_EMPTY_ROOT(&st->idle));
+	}
+	BUG_ON(bfqg->sched_data.next_in_service != NULL);
+	BUG_ON(bfqg->sched_data.in_service_entity != NULL);
+
+	/*
+	 * We may race with device destruction, take extra care when
+	 * dereferencing bfqg->bfqd.
+	 */
+	bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
+	if (bfqd != NULL) {
+		hlist_del(&bfqg->bfqd_node);
+		__bfq_deactivate_entity(entity, 0);
+		bfq_put_async_queues(bfqd, bfqg);
+		bfq_put_bfqd_unlock(bfqd, &flags);
+	}
+	BUG_ON(entity->tree != NULL);
+
+	/*
+	 * No need to defer the kfree() to the end of the RCU grace
+	 * period: we are called from the destroy() callback of our
+	 * cgroup, so we can be sure that no one is a) still using
+	 * this cgroup or b) doing lookups in it.
+	 */
+	kfree(bfqg);
+}
+
+static void bfq_end_wr_async(struct bfq_data *bfqd)
+{
+	struct hlist_node *tmp;
+	struct bfq_group *bfqg;
+
+	hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node)
+		bfq_end_wr_async_queues(bfqd, bfqg);
+	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+}
+
+/**
+ * bfq_disconnect_groups - disconnect @bfqd from all its groups.
+ * @bfqd: the device descriptor being exited.
+ *
+ * When the device exits we just make sure that no lookup can return
+ * the now unused group structures.  They will be deallocated on cgroup
+ * destruction.
+ */
+static void bfq_disconnect_groups(struct bfq_data *bfqd)
+{
+	struct hlist_node *tmp;
+	struct bfq_group *bfqg;
+
+	bfq_log(bfqd, "disconnect_groups beginning");
+	hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) {
+		hlist_del(&bfqg->bfqd_node);
+
+		__bfq_deactivate_entity(bfqg->my_entity, 0);
+
+		/*
+		 * Don't remove from the group hash, just set an
+		 * invalid key.  No lookups can race with the
+		 * assignment as bfqd is being destroyed; this
+		 * implies also that new elements cannot be added
+		 * to the list.
+		 */
+		rcu_assign_pointer(bfqg->bfqd, NULL);
+
+		bfq_log(bfqd, "disconnect_groups: put async for group %p",
+			bfqg);
+		bfq_put_async_queues(bfqd, bfqg);
+	}
+}
+
+static inline void bfq_free_root_group(struct bfq_data *bfqd)
+{
+	struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;
+	struct bfq_group *bfqg = bfqd->root_group;
+
+	bfq_put_async_queues(bfqd, bfqg);
+
+	spin_lock_irq(&bgrp->lock);
+	hlist_del_rcu(&bfqg->group_node);
+	spin_unlock_irq(&bgrp->lock);
+
+	/*
+	 * No need to synchronize_rcu() here: since the device is gone
+	 * there cannot be any read-side access to its root_group.
+	 */
+	kfree(bfqg);
+}
+
+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
+{
+	struct bfq_group *bfqg;
+	struct bfqio_cgroup *bgrp;
+	int i;
+
+	bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node);
+	if (bfqg == NULL)
+		return NULL;
+
+	bfqg->entity.parent = NULL;
+	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
+
+	bgrp = &bfqio_root_cgroup;
+	spin_lock_irq(&bgrp->lock);
+	rcu_assign_pointer(bfqg->bfqd, bfqd);
+	hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);
+	spin_unlock_irq(&bgrp->lock);
+
+	return bfqg;
+}
+
+#define SHOW_FUNCTION(__VAR)						\
+static u64 bfqio_cgroup_##__VAR##_read(struct cgroup_subsys_state *css, \
+				       struct cftype *cftype)		\
+{									\
+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);			\
+	u64 ret = -ENODEV;						\
+									\
+	mutex_lock(&bfqio_mutex);					\
+	if (bfqio_is_removed(bgrp))					\
+		goto out_unlock;					\
+									\
+	spin_lock_irq(&bgrp->lock);					\
+	ret = bgrp->__VAR;						\
+	spin_unlock_irq(&bgrp->lock);					\
+									\
+out_unlock:								\
+	mutex_unlock(&bfqio_mutex);					\
+	return ret;							\
+}
+
+SHOW_FUNCTION(weight);
+SHOW_FUNCTION(ioprio);
+SHOW_FUNCTION(ioprio_class);
+#undef SHOW_FUNCTION
+
+#define STORE_FUNCTION(__VAR, __MIN, __MAX)				\
+static int bfqio_cgroup_##__VAR##_write(struct cgroup_subsys_state *css,\
+					struct cftype *cftype,		\
+					u64 val)			\
+{									\
+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);			\
+	struct bfq_group *bfqg;						\
+	int ret = -EINVAL;						\
+									\
+	if (val < (__MIN) || val > (__MAX))				\
+		return ret;						\
+									\
+	ret = -ENODEV;							\
+	mutex_lock(&bfqio_mutex);					\
+	if (bfqio_is_removed(bgrp))					\
+		goto out_unlock;					\
+	ret = 0;							\
+									\
+	spin_lock_irq(&bgrp->lock);					\
+	bgrp->__VAR = (unsigned short)val;				\
+	hlist_for_each_entry(bfqg, &bgrp->group_data, group_node) {	\
+		/*							\
+		 * Setting the ioprio_changed flag of the entity        \
+		 * to 1 with new_##__VAR == ##__VAR would re-set        \
+		 * the value of the weight to its ioprio mapping.       \
+		 * Set the flag only if necessary.			\
+		 */							\
+		if ((unsigned short)val != bfqg->entity.new_##__VAR) {  \
+			bfqg->entity.new_##__VAR = (unsigned short)val; \
+			/*						\
+			 * Make sure that the above new value has been	\
+			 * stored in bfqg->entity.new_##__VAR before	\
+			 * setting the ioprio_changed flag. In fact,	\
+			 * this flag may be read asynchronously (in	\
+			 * critical sections protected by a different	\
+			 * lock than that held here), and finding this	\
+			 * flag set may cause the execution of the code	\
+			 * for updating parameters whose value may	\
+			 * depend also on bfqg->entity.new_##__VAR (in	\
+			 * __bfq_entity_update_weight_prio).		\
+			 * This barrier makes sure that the new value	\
+			 * of bfqg->entity.new_##__VAR is correctly	\
+			 * seen in that code.				\
+			 */						\
+			smp_wmb();                                      \
+			bfqg->entity.ioprio_changed = 1;                \
+		}							\
+	}								\
+	spin_unlock_irq(&bgrp->lock);					\
+									\
+out_unlock:								\
+	mutex_unlock(&bfqio_mutex);					\
+	return ret;							\
+}
+
+STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);
+STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);
+STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);
+#undef STORE_FUNCTION
+
+static struct cftype bfqio_files[] = {
+	{
+		.name = "weight",
+		.read_u64 = bfqio_cgroup_weight_read,
+		.write_u64 = bfqio_cgroup_weight_write,
+	},
+	{
+		.name = "ioprio",
+		.read_u64 = bfqio_cgroup_ioprio_read,
+		.write_u64 = bfqio_cgroup_ioprio_write,
+	},
+	{
+		.name = "ioprio_class",
+		.read_u64 = bfqio_cgroup_ioprio_class_read,
+		.write_u64 = bfqio_cgroup_ioprio_class_write,
+	},
+	{ },	/* terminate */
+};
+
+static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys_state
+						*parent_css)
+{
+	struct bfqio_cgroup *bgrp;
+
+	if (parent_css != NULL) {
+		bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);
+		if (bgrp == NULL)
+			return ERR_PTR(-ENOMEM);
+	} else
+		bgrp = &bfqio_root_cgroup;
+
+	spin_lock_init(&bgrp->lock);
+	INIT_HLIST_HEAD(&bgrp->group_data);
+	bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;
+	bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;
+
+	return &bgrp->css;
+}
+
+/*
+ * We cannot support shared io contexts, as we have no means to support
+ * two tasks with the same ioc in two different groups without major rework
+ * of the main bic/bfqq data structures.  By now we allow a task to change
+ * its cgroup only if it's the only owner of its ioc; the drawback of this
+ * behavior is that a group containing a task that forked using CLONE_IO
+ * will not be destroyed until the tasks sharing the ioc die.
+ */
+static int bfqio_can_attach(struct cgroup_subsys_state *css,
+			    struct cgroup_taskset *tset)
+{
+	struct task_struct *task;
+	struct io_context *ioc;
+	int ret = 0;
+
+	cgroup_taskset_for_each(task, tset) {
+		/*
+		 * task_lock() is needed to avoid races with
+		 * exit_io_context()
+		 */
+		task_lock(task);
+		ioc = task->io_context;
+		if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
+			/*
+			 * ioc == NULL means that the task is either too
+			 * young or exiting: if it has still no ioc the
+			 * ioc can't be shared, if the task is exiting the
+			 * attach will fail anyway, no matter what we
+			 * return here.
+			 */
+			ret = -EINVAL;
+		task_unlock(task);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static void bfqio_attach(struct cgroup_subsys_state *css,
+			 struct cgroup_taskset *tset)
+{
+	struct task_struct *task;
+	struct io_context *ioc;
+	struct io_cq *icq;
+
+	/*
+	 * IMPORTANT NOTE: The move of more than one process at a time to a
+	 * new group has not yet been tested.
+	 */
+	cgroup_taskset_for_each(task, tset) {
+		ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
+		if (ioc) {
+			/*
+			 * Handle cgroup change here.
+			 */
+			rcu_read_lock();
+			hlist_for_each_entry_rcu(icq, &ioc->icq_list, ioc_node)
+				if (!strncmp(
+					icq->q->elevator->type->elevator_name,
+					"bfq", ELV_NAME_MAX))
+					bfq_bic_change_cgroup(icq_to_bic(icq),
+							      css);
+			rcu_read_unlock();
+			put_io_context(ioc);
+		}
+	}
+}
+
+static void bfqio_destroy(struct cgroup_subsys_state *css)
+{
+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);
+	struct hlist_node *tmp;
+	struct bfq_group *bfqg;
+
+	/*
+	 * Since we are destroying the cgroup, there are no more tasks
+	 * referencing it, and all the RCU grace periods that may have
+	 * referenced it are ended (as the destruction of the parent
+	 * cgroup is RCU-safe); bgrp->group_data will not be accessed by
+	 * anything else and we don't need any synchronization.
+	 */
+	hlist_for_each_entry_safe(bfqg, tmp, &bgrp->group_data, group_node)
+		bfq_destroy_group(bgrp, bfqg);
+
+	BUG_ON(!hlist_empty(&bgrp->group_data));
+
+	kfree(bgrp);
+}
+
+static int bfqio_css_online(struct cgroup_subsys_state *css)
+{
+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);
+
+	mutex_lock(&bfqio_mutex);
+	bgrp->online = true;
+	mutex_unlock(&bfqio_mutex);
+
+	return 0;
+}
+
+static void bfqio_css_offline(struct cgroup_subsys_state *css)
+{
+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);
+
+	mutex_lock(&bfqio_mutex);
+	bgrp->online = false;
+	mutex_unlock(&bfqio_mutex);
+}
+
+struct cgroup_subsys bfqio_cgrp_subsys = {
+	.css_alloc = bfqio_create,
+	.css_online = bfqio_css_online,
+	.css_offline = bfqio_css_offline,
+	.can_attach = bfqio_can_attach,
+	.attach = bfqio_attach,
+	.css_free = bfqio_destroy,
+	.legacy_cftypes = bfqio_files,
+};
+#else
+static inline void bfq_init_entity(struct bfq_entity *entity,
+				   struct bfq_group *bfqg)
+{
+	entity->weight = entity->new_weight;
+	entity->orig_weight = entity->new_weight;
+	entity->ioprio = entity->new_ioprio;
+	entity->ioprio_class = entity->new_ioprio_class;
+	entity->sched_data = &bfqg->sched_data;
+}
+
+static inline struct bfq_group *
+bfq_bic_update_cgroup(struct bfq_io_cq *bic)
+{
+	struct bfq_data *bfqd = bic_to_bfqd(bic);
+	return bfqd->root_group;
+}
+
+static inline void bfq_bfqq_move(struct bfq_data *bfqd,
+				 struct bfq_queue *bfqq,
+				 struct bfq_entity *entity,
+				 struct bfq_group *bfqg)
+{
+}
+
+static void bfq_end_wr_async(struct bfq_data *bfqd)
+{
+	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+}
+
+static inline void bfq_disconnect_groups(struct bfq_data *bfqd)
+{
+	bfq_put_async_queues(bfqd, bfqd->root_group);
+}
+
+static inline void bfq_free_root_group(struct bfq_data *bfqd)
+{
+	kfree(bfqd->root_group);
+}
+
+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
+{
+	struct bfq_group *bfqg;
+	int i;
+
+	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
+	if (bfqg == NULL)
+		return NULL;
+
+	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
+
+	return bfqg;
+}
+#endif
diff -Nur linux-4.2/block/bfq-ioc.c linux-4.2-pck/block/bfq-ioc.c
--- linux-4.2/block/bfq-ioc.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/block/bfq-ioc.c	2015-09-08 00:48:22.215031180 -0300
@@ -0,0 +1,36 @@
+/*
+ * BFQ: I/O context handling.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
+ */
+
+/**
+ * icq_to_bic - convert iocontext queue structure to bfq_io_cq.
+ * @icq: the iocontext queue.
+ */
+static inline struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
+{
+	/* bic->icq is the first member, %NULL will convert to %NULL */
+	return container_of(icq, struct bfq_io_cq, icq);
+}
+
+/**
+ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd.
+ * @bfqd: the lookup key.
+ * @ioc: the io_context of the process doing I/O.
+ *
+ * Queue lock must be held.
+ */
+static inline struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
+					       struct io_context *ioc)
+{
+	if (ioc)
+		return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue));
+	return NULL;
+}
diff -Nur linux-4.2/block/bfq-iosched.c linux-4.2-pck/block/bfq-iosched.c
--- linux-4.2/block/bfq-iosched.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/block/bfq-iosched.c	2015-09-08 00:48:22.218364355 -0300
@@ -0,0 +1,4218 @@
+/*
+ * Budget Fair Queueing (BFQ) disk scheduler.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
+ * file.
+ *
+ * BFQ is a proportional-share storage-I/O scheduling algorithm based on
+ * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets,
+ * measured in number of sectors, to processes instead of time slices. The
+ * device is not granted to the in-service process for a given time slice,
+ * but until it has exhausted its assigned budget. This change from the time
+ * to the service domain allows BFQ to distribute the device throughput
+ * among processes as desired, without any distortion due to ZBR, workload
+ * fluctuations or other factors. BFQ uses an ad hoc internal scheduler,
+ * called B-WF2Q+, to schedule processes according to their budgets. More
+ * precisely, BFQ schedules queues associated to processes. Thanks to the
+ * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to
+ * I/O-bound processes issuing sequential requests (to boost the
+ * throughput), and yet guarantee a low latency to interactive and soft
+ * real-time applications.
+ *
+ * BFQ is described in [1], where also a reference to the initial, more
+ * theoretical paper on BFQ can be found. The interested reader can find
+ * in the latter paper full details on the main algorithm, as well as
+ * formulas of the guarantees and formal proofs of all the properties.
+ * With respect to the version of BFQ presented in these papers, this
+ * implementation adds a few more heuristics, such as the one that
+ * guarantees a low latency to soft real-time applications, and a
+ * hierarchical extension based on H-WF2Q+.
+ *
+ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with
+ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
+ * complexity derives from the one introduced with EEVDF in [3].
+ *
+ * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness
+ *     with the BFQ Disk I/O Scheduler'',
+ *     Proceedings of the 5th Annual International Systems and Storage
+ *     Conference (SYSTOR '12), June 2012.
+ *
+ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf
+ *
+ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing
+ *     Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689,
+ *     Oct 1997.
+ *
+ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz
+ *
+ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline
+ *     First: A Flexible and Accurate Mechanism for Proportional Share
+ *     Resource Allocation,'' technical report.
+ *
+ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/cgroup.h>
+#include <linux/elevator.h>
+#include <linux/jiffies.h>
+#include <linux/rbtree.h>
+#include <linux/ioprio.h>
+#include "bfq.h"
+#include "blk.h"
+
+/* Expiration time of sync (0) and async (1) requests, in jiffies. */
+static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
+
+/* Maximum backwards seek, in KiB. */
+static const int bfq_back_max = 16 * 1024;
+
+/* Penalty of a backwards seek, in number of sectors. */
+static const int bfq_back_penalty = 2;
+
+/* Idling period duration, in jiffies. */
+static int bfq_slice_idle = HZ / 125;
+
+/* Default maximum budget values, in sectors and number of requests. */
+static const int bfq_default_max_budget = 16 * 1024;
+static const int bfq_max_budget_async_rq = 4;
+
+/*
+ * Async to sync throughput distribution is controlled as follows:
+ * when an async request is served, the entity is charged the number
+ * of sectors of the request, multiplied by the factor below
+ */
+static const int bfq_async_charge_factor = 10;
+
+/* Default timeout values, in jiffies, approximating CFQ defaults. */
+static const int bfq_timeout_sync = HZ / 8;
+static int bfq_timeout_async = HZ / 25;
+
+struct kmem_cache *bfq_pool;
+
+/* Below this threshold (in ms), we consider thinktime immediate. */
+#define BFQ_MIN_TT		2
+
+/* hw_tag detection: parallel requests threshold and min samples needed. */
+#define BFQ_HW_QUEUE_THRESHOLD	4
+#define BFQ_HW_QUEUE_SAMPLES	32
+
+#define BFQQ_SEEK_THR	 (sector_t)(8 * 1024)
+#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR)
+
+/* Min samples used for peak rate estimation (for autotuning). */
+#define BFQ_PEAK_RATE_SAMPLES	32
+
+/* Shift used for peak rate fixed precision calculations. */
+#define BFQ_RATE_SHIFT		16
+
+/*
+ * By default, BFQ computes the duration of the weight raising for
+ * interactive applications automatically, using the following formula:
+ * duration = (R / r) * T, where r is the peak rate of the device, and
+ * R and T are two reference parameters.
+ * In particular, R is the peak rate of the reference device (see below),
+ * and T is a reference time: given the systems that are likely to be
+ * installed on the reference device according to its speed class, T is
+ * about the maximum time needed, under BFQ and while reading two files in
+ * parallel, to load typical large applications on these systems.
+ * In practice, the slower/faster the device at hand is, the more/less it
+ * takes to load applications with respect to the reference device.
+ * Accordingly, the longer/shorter BFQ grants weight raising to interactive
+ * applications.
+ *
+ * BFQ uses four different reference pairs (R, T), depending on:
+ * . whether the device is rotational or non-rotational;
+ * . whether the device is slow, such as old or portable HDDs, as well as
+ *   SD cards, or fast, such as newer HDDs and SSDs.
+ *
+ * The device's speed class is dynamically (re)detected in
+ * bfq_update_peak_rate() every time the estimated peak rate is updated.
+ *
+ * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0]
+ * are the reference values for a slow/fast rotational device, whereas
+ * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for
+ * a slow/fast non-rotational device. Finally, device_speed_thresh are the
+ * thresholds used to switch between speed classes.
+ * Both the reference peak rates and the thresholds are measured in
+ * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
+ */
+static int R_slow[2] = {1536, 10752};
+static int R_fast[2] = {17415, 34791};
+/*
+ * To improve readability, a conversion function is used to initialize the
+ * following arrays, which entails that they can be initialized only in a
+ * function.
+ */
+static int T_slow[2];
+static int T_fast[2];
+static int device_speed_thresh[2];
+
+#define BFQ_SERVICE_TREE_INIT	((struct bfq_service_tree)		\
+				{ RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
+
+#define RQ_BIC(rq)		((struct bfq_io_cq *) (rq)->elv.priv[0])
+#define RQ_BFQQ(rq)		((rq)->elv.priv[1])
+
+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd);
+
+#include "bfq-ioc.c"
+#include "bfq-sched.c"
+#include "bfq-cgroup.c"
+
+#define bfq_class_idle(bfqq)	((bfqq)->entity.ioprio_class ==\
+				 IOPRIO_CLASS_IDLE)
+#define bfq_class_rt(bfqq)	((bfqq)->entity.ioprio_class ==\
+				 IOPRIO_CLASS_RT)
+
+#define bfq_sample_valid(samples)	((samples) > 80)
+
+/*
+ * The following macro groups conditions that need to be evaluated when
+ * checking if existing queues and groups form a symmetric scenario
+ * and therefore idling can be reduced or disabled for some of the
+ * queues. See the comment to the function bfq_bfqq_must_not_expire()
+ * for further details.
+ */
+#ifdef CONFIG_CGROUP_BFQIO
+#define symmetric_scenario	  (!bfqd->active_numerous_groups && \
+				   !bfq_differentiated_weights(bfqd))
+#else
+#define symmetric_scenario	  (!bfq_differentiated_weights(bfqd))
+#endif
+
+/*
+ * We regard a request as SYNC, if either it's a read or has the SYNC bit
+ * set (in which case it could also be a direct WRITE).
+ */
+static inline int bfq_bio_sync(struct bio *bio)
+{
+	if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC))
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Scheduler run of queue, if there are requests pending and no one in the
+ * driver that will restart queueing.
+ */
+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd)
+{
+	if (bfqd->queued != 0) {
+		bfq_log(bfqd, "schedule dispatch");
+		kblockd_schedule_work(&bfqd->unplug_work);
+	}
+}
+
+/*
+ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
+ * We choose the request that is closesr to the head right now.  Distance
+ * behind the head is penalized and only allowed to a certain extent.
+ */
+static struct request *bfq_choose_req(struct bfq_data *bfqd,
+				      struct request *rq1,
+				      struct request *rq2,
+				      sector_t last)
+{
+	sector_t s1, s2, d1 = 0, d2 = 0;
+	unsigned long back_max;
+#define BFQ_RQ1_WRAP	0x01 /* request 1 wraps */
+#define BFQ_RQ2_WRAP	0x02 /* request 2 wraps */
+	unsigned wrap = 0; /* bit mask: requests behind the disk head? */
+
+	if (rq1 == NULL || rq1 == rq2)
+		return rq2;
+	if (rq2 == NULL)
+		return rq1;
+
+	if (rq_is_sync(rq1) && !rq_is_sync(rq2))
+		return rq1;
+	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
+		return rq2;
+	if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
+		return rq1;
+	else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META))
+		return rq2;
+
+	s1 = blk_rq_pos(rq1);
+	s2 = blk_rq_pos(rq2);
+
+	/*
+	 * By definition, 1KiB is 2 sectors.
+	 */
+	back_max = bfqd->bfq_back_max * 2;
+
+	/*
+	 * Strict one way elevator _except_ in the case where we allow
+	 * short backward seeks which are biased as twice the cost of a
+	 * similar forward seek.
+	 */
+	if (s1 >= last)
+		d1 = s1 - last;
+	else if (s1 + back_max >= last)
+		d1 = (last - s1) * bfqd->bfq_back_penalty;
+	else
+		wrap |= BFQ_RQ1_WRAP;
+
+	if (s2 >= last)
+		d2 = s2 - last;
+	else if (s2 + back_max >= last)
+		d2 = (last - s2) * bfqd->bfq_back_penalty;
+	else
+		wrap |= BFQ_RQ2_WRAP;
+
+	/* Found required data */
+
+	/*
+	 * By doing switch() on the bit mask "wrap" we avoid having to
+	 * check two variables for all permutations: --> faster!
+	 */
+	switch (wrap) {
+	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
+		if (d1 < d2)
+			return rq1;
+		else if (d2 < d1)
+			return rq2;
+		else {
+			if (s1 >= s2)
+				return rq1;
+			else
+				return rq2;
+		}
+
+	case BFQ_RQ2_WRAP:
+		return rq1;
+	case BFQ_RQ1_WRAP:
+		return rq2;
+	case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */
+	default:
+		/*
+		 * Since both rqs are wrapped,
+		 * start with the one that's further behind head
+		 * (--> only *one* back seek required),
+		 * since back seek takes more time than forward.
+		 */
+		if (s1 <= s2)
+			return rq1;
+		else
+			return rq2;
+	}
+}
+
+static struct bfq_queue *
+bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
+		     sector_t sector, struct rb_node **ret_parent,
+		     struct rb_node ***rb_link)
+{
+	struct rb_node **p, *parent;
+	struct bfq_queue *bfqq = NULL;
+
+	parent = NULL;
+	p = &root->rb_node;
+	while (*p) {
+		struct rb_node **n;
+
+		parent = *p;
+		bfqq = rb_entry(parent, struct bfq_queue, pos_node);
+
+		/*
+		 * Sort strictly based on sector. Smallest to the left,
+		 * largest to the right.
+		 */
+		if (sector > blk_rq_pos(bfqq->next_rq))
+			n = &(*p)->rb_right;
+		else if (sector < blk_rq_pos(bfqq->next_rq))
+			n = &(*p)->rb_left;
+		else
+			break;
+		p = n;
+		bfqq = NULL;
+	}
+
+	*ret_parent = parent;
+	if (rb_link)
+		*rb_link = p;
+
+	bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",
+		(long long unsigned)sector,
+		bfqq != NULL ? bfqq->pid : 0);
+
+	return bfqq;
+}
+
+static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	struct rb_node **p, *parent;
+	struct bfq_queue *__bfqq;
+
+	if (bfqq->pos_root != NULL) {
+		rb_erase(&bfqq->pos_node, bfqq->pos_root);
+		bfqq->pos_root = NULL;
+	}
+
+	if (bfq_class_idle(bfqq))
+		return;
+	if (!bfqq->next_rq)
+		return;
+
+	bfqq->pos_root = &bfqd->rq_pos_tree;
+	__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
+			blk_rq_pos(bfqq->next_rq), &parent, &p);
+	if (__bfqq == NULL) {
+		rb_link_node(&bfqq->pos_node, parent, p);
+		rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
+	} else
+		bfqq->pos_root = NULL;
+}
+
+/*
+ * Tell whether there are active queues or groups with differentiated weights.
+ */
+static inline bool bfq_differentiated_weights(struct bfq_data *bfqd)
+{
+	/*
+	 * For weights to differ, at least one of the trees must contain
+	 * at least two nodes.
+	 */
+	return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
+		(bfqd->queue_weights_tree.rb_node->rb_left ||
+		 bfqd->queue_weights_tree.rb_node->rb_right)
+#ifdef CONFIG_CGROUP_BFQIO
+	       ) ||
+	       (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) &&
+		(bfqd->group_weights_tree.rb_node->rb_left ||
+		 bfqd->group_weights_tree.rb_node->rb_right)
+#endif
+	       );
+}
+
+/*
+ * If the weight-counter tree passed as input contains no counter for
+ * the weight of the input entity, then add that counter; otherwise just
+ * increment the existing counter.
+ *
+ * Note that weight-counter trees contain few nodes in mostly symmetric
+ * scenarios. For example, if all queues have the same weight, then the
+ * weight-counter tree for the queues may contain at most one node.
+ * This holds even if low_latency is on, because weight-raised queues
+ * are not inserted in the tree.
+ * In most scenarios, the rate at which nodes are created/destroyed
+ * should be low too.
+ */
+static void bfq_weights_tree_add(struct bfq_data *bfqd,
+				 struct bfq_entity *entity,
+				 struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/*
+	 * Do not insert if the entity is already associated with a
+	 * counter, which happens if:
+	 *   1) the entity is associated with a queue,
+	 *   2) a request arrival has caused the queue to become both
+	 *      non-weight-raised, and hence change its weight, and
+	 *      backlogged; in this respect, each of the two events
+	 *      causes an invocation of this function,
+	 *   3) this is the invocation of this function caused by the
+	 *      second event. This second invocation is actually useless,
+	 *      and we handle this fact by exiting immediately. More
+	 *      efficient or clearer solutions might possibly be adopted.
+	 */
+	if (entity->weight_counter)
+		return;
+
+	while (*new) {
+		struct bfq_weight_counter *__counter = container_of(*new,
+						struct bfq_weight_counter,
+						weights_node);
+		parent = *new;
+
+		if (entity->weight == __counter->weight) {
+			entity->weight_counter = __counter;
+			goto inc_counter;
+		}
+		if (entity->weight < __counter->weight)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
+					 GFP_ATOMIC);
+	entity->weight_counter->weight = entity->weight;
+	rb_link_node(&entity->weight_counter->weights_node, parent, new);
+	rb_insert_color(&entity->weight_counter->weights_node, root);
+
+inc_counter:
+	entity->weight_counter->num_active++;
+}
+
+/*
+ * Decrement the weight counter associated with the entity, and, if the
+ * counter reaches 0, remove the counter from the tree.
+ * See the comments to the function bfq_weights_tree_add() for considerations
+ * about overhead.
+ */
+static void bfq_weights_tree_remove(struct bfq_data *bfqd,
+				    struct bfq_entity *entity,
+				    struct rb_root *root)
+{
+	if (!entity->weight_counter)
+		return;
+
+	BUG_ON(RB_EMPTY_ROOT(root));
+	BUG_ON(entity->weight_counter->weight != entity->weight);
+
+	BUG_ON(!entity->weight_counter->num_active);
+	entity->weight_counter->num_active--;
+	if (entity->weight_counter->num_active > 0)
+		goto reset_entity_pointer;
+
+	rb_erase(&entity->weight_counter->weights_node, root);
+	kfree(entity->weight_counter);
+
+reset_entity_pointer:
+	entity->weight_counter = NULL;
+}
+
+static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
+					struct bfq_queue *bfqq,
+					struct request *last)
+{
+	struct rb_node *rbnext = rb_next(&last->rb_node);
+	struct rb_node *rbprev = rb_prev(&last->rb_node);
+	struct request *next = NULL, *prev = NULL;
+
+	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
+
+	if (rbprev != NULL)
+		prev = rb_entry_rq(rbprev);
+
+	if (rbnext != NULL)
+		next = rb_entry_rq(rbnext);
+	else {
+		rbnext = rb_first(&bfqq->sort_list);
+		if (rbnext && rbnext != &last->rb_node)
+			next = rb_entry_rq(rbnext);
+	}
+
+	return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last));
+}
+
+/* see the definition of bfq_async_charge_factor for details */
+static inline unsigned long bfq_serv_to_charge(struct request *rq,
+					       struct bfq_queue *bfqq)
+{
+	return blk_rq_sectors(rq) *
+		(1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) *
+		bfq_async_charge_factor));
+}
+
+/**
+ * bfq_updated_next_req - update the queue after a new next_rq selection.
+ * @bfqd: the device data the queue belongs to.
+ * @bfqq: the queue to update.
+ *
+ * If the first request of a queue changes we make sure that the queue
+ * has enough budget to serve at least its first request (if the
+ * request has grown).  We do this because if the queue has not enough
+ * budget for its first request, it has to go through two dispatch
+ * rounds to actually get it dispatched.
+ */
+static void bfq_updated_next_req(struct bfq_data *bfqd,
+				 struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+	struct request *next_rq = bfqq->next_rq;
+	unsigned long new_budget;
+
+	if (next_rq == NULL)
+		return;
+
+	if (bfqq == bfqd->in_service_queue)
+		/*
+		 * In order not to break guarantees, budgets cannot be
+		 * changed after an entity has been selected.
+		 */
+		return;
+
+	BUG_ON(entity->tree != &st->active);
+	BUG_ON(entity == entity->sched_data->in_service_entity);
+
+	new_budget = max_t(unsigned long, bfqq->max_budget,
+			   bfq_serv_to_charge(next_rq, bfqq));
+	if (entity->budget != new_budget) {
+		entity->budget = new_budget;
+		bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu",
+					 new_budget);
+		bfq_activate_bfqq(bfqd, bfqq);
+	}
+}
+
+static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+{
+	u64 dur;
+
+	if (bfqd->bfq_wr_max_time > 0)
+		return bfqd->bfq_wr_max_time;
+
+	dur = bfqd->RT_prod;
+	do_div(dur, bfqd->peak_rate);
+
+	return dur;
+}
+
+static inline unsigned
+bfq_bfqq_cooperations(struct bfq_queue *bfqq)
+{
+	return bfqq->bic ? bfqq->bic->cooperations : 0;
+}
+
+static inline void
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+{
+	if (bic->saved_idle_window)
+		bfq_mark_bfqq_idle_window(bfqq);
+	else
+		bfq_clear_bfqq_idle_window(bfqq);
+	if (bic->saved_IO_bound)
+		bfq_mark_bfqq_IO_bound(bfqq);
+	else
+		bfq_clear_bfqq_IO_bound(bfqq);
+	/* Assuming that the flag in_large_burst is already correctly set */
+	if (bic->wr_time_left && bfqq->bfqd->low_latency &&
+	    !bfq_bfqq_in_large_burst(bfqq) &&
+	    bic->cooperations < bfqq->bfqd->bfq_coop_thresh) {
+		/*
+		 * Start a weight raising period with the duration given by
+		 * the raising_time_left snapshot.
+		 */
+		if (bfq_bfqq_busy(bfqq))
+			bfqq->bfqd->wr_busy_queues++;
+		bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff;
+		bfqq->wr_cur_max_time = bic->wr_time_left;
+		bfqq->last_wr_start_finish = jiffies;
+		bfqq->entity.ioprio_changed = 1;
+	}
+	/*
+	 * Clear wr_time_left to prevent bfq_bfqq_save_state() from
+	 * getting confused about the queue's need of a weight-raising
+	 * period.
+	 */
+	bic->wr_time_left = 0;
+}
+
+/* Must be called with the queue_lock held. */
+static int bfqq_process_refs(struct bfq_queue *bfqq)
+{
+	int process_refs, io_refs;
+
+	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
+	process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
+	BUG_ON(process_refs < 0);
+	return process_refs;
+}
+
+/* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */
+static inline void bfq_reset_burst_list(struct bfq_data *bfqd,
+					struct bfq_queue *bfqq)
+{
+	struct bfq_queue *item;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node)
+		hlist_del_init(&item->burst_list_node);
+	hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+	bfqd->burst_size = 1;
+}
+
+/* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */
+static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	/* Increment burst size to take into account also bfqq */
+	bfqd->burst_size++;
+
+	if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) {
+		struct bfq_queue *pos, *bfqq_item;
+		struct hlist_node *n;
+
+		/*
+		 * Enough queues have been activated shortly after each
+		 * other to consider this burst as large.
+		 */
+		bfqd->large_burst = true;
+
+		/*
+		 * We can now mark all queues in the burst list as
+		 * belonging to a large burst.
+		 */
+		hlist_for_each_entry(bfqq_item, &bfqd->burst_list,
+				     burst_list_node)
+		        bfq_mark_bfqq_in_large_burst(bfqq_item);
+		bfq_mark_bfqq_in_large_burst(bfqq);
+
+		/*
+		 * From now on, and until the current burst finishes, any
+		 * new queue being activated shortly after the last queue
+		 * was inserted in the burst can be immediately marked as
+		 * belonging to a large burst. So the burst list is not
+		 * needed any more. Remove it.
+		 */
+		hlist_for_each_entry_safe(pos, n, &bfqd->burst_list,
+					  burst_list_node)
+			hlist_del_init(&pos->burst_list_node);
+	} else /* burst not yet large: add bfqq to the burst list */
+		hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+}
+
+/*
+ * If many queues happen to become active shortly after each other, then,
+ * to help the processes associated to these queues get their job done as
+ * soon as possible, it is usually better to not grant either weight-raising
+ * or device idling to these queues. In this comment we describe, firstly,
+ * the reasons why this fact holds, and, secondly, the next function, which
+ * implements the main steps needed to properly mark these queues so that
+ * they can then be treated in a different way.
+ *
+ * As for the terminology, we say that a queue becomes active, i.e.,
+ * switches from idle to backlogged, either when it is created (as a
+ * consequence of the arrival of an I/O request), or, if already existing,
+ * when a new request for the queue arrives while the queue is idle.
+ * Bursts of activations, i.e., activations of different queues occurring
+ * shortly after each other, are typically caused by services or applications
+ * that spawn or reactivate many parallel threads/processes. Examples are
+ * systemd during boot or git grep.
+ *
+ * These services or applications benefit mostly from a high throughput:
+ * the quicker the requests of the activated queues are cumulatively served,
+ * the sooner the target job of these queues gets completed. As a consequence,
+ * weight-raising any of these queues, which also implies idling the device
+ * for it, is almost always counterproductive: in most cases it just lowers
+ * throughput.
+ *
+ * On the other hand, a burst of activations may be also caused by the start
+ * of an application that does not consist in a lot of parallel I/O-bound
+ * threads. In fact, with a complex application, the burst may be just a
+ * consequence of the fact that several processes need to be executed to
+ * start-up the application. To start an application as quickly as possible,
+ * the best thing to do is to privilege the I/O related to the application
+ * with respect to all other I/O. Therefore, the best strategy to start as
+ * quickly as possible an application that causes a burst of activations is
+ * to weight-raise all the queues activated during the burst. This is the
+ * exact opposite of the best strategy for the other type of bursts.
+ *
+ * In the end, to take the best action for each of the two cases, the two
+ * types of bursts need to be distinguished. Fortunately, this seems
+ * relatively easy to do, by looking at the sizes of the bursts. In
+ * particular, we found a threshold such that bursts with a larger size
+ * than that threshold are apparently caused only by services or commands
+ * such as systemd or git grep. For brevity, hereafter we call just 'large'
+ * these bursts. BFQ *does not* weight-raise queues whose activations occur
+ * in a large burst. In addition, for each of these queues BFQ performs or
+ * does not perform idling depending on which choice boosts the throughput
+ * most. The exact choice depends on the device and request pattern at
+ * hand.
+ *
+ * Turning back to the next function, it implements all the steps needed
+ * to detect the occurrence of a large burst and to properly mark all the
+ * queues belonging to it (so that they can then be treated in a different
+ * way). This goal is achieved by maintaining a special "burst list" that
+ * holds, temporarily, the queues that belong to the burst in progress. The
+ * list is then used to mark these queues as belonging to a large burst if
+ * the burst does become large. The main steps are the following.
+ *
+ * . when the very first queue is activated, the queue is inserted into the
+ *   list (as it could be the first queue in a possible burst)
+ *
+ * . if the current burst has not yet become large, and a queue Q that does
+ *   not yet belong to the burst is activated shortly after the last time
+ *   at which a new queue entered the burst list, then the function appends
+ *   Q to the burst list
+ *
+ * . if, as a consequence of the previous step, the burst size reaches
+ *   the large-burst threshold, then
+ *
+ *     . all the queues in the burst list are marked as belonging to a
+ *       large burst
+ *
+ *     . the burst list is deleted; in fact, the burst list already served
+ *       its purpose (keeping temporarily track of the queues in a burst,
+ *       so as to be able to mark them as belonging to a large burst in the
+ *       previous sub-step), and now is not needed any more
+ *
+ *     . the device enters a large-burst mode
+ *
+ * . if a queue Q that does not belong to the burst is activated while
+ *   the device is in large-burst mode and shortly after the last time
+ *   at which a queue either entered the burst list or was marked as
+ *   belonging to the current large burst, then Q is immediately marked
+ *   as belonging to a large burst.
+ *
+ * . if a queue Q that does not belong to the burst is activated a while
+ *   later, i.e., not shortly after, than the last time at which a queue
+ *   either entered the burst list or was marked as belonging to the
+ *   current large burst, then the current burst is deemed as finished and:
+ *
+ *        . the large-burst mode is reset if set
+ *
+ *        . the burst list is emptied
+ *
+ *        . Q is inserted in the burst list, as Q may be the first queue
+ *          in a possible new burst (then the burst list contains just Q
+ *          after this step).
+ */
+static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			     bool idle_for_long_time)
+{
+	/*
+	 * If bfqq happened to be activated in a burst, but has been idle
+	 * for at least as long as an interactive queue, then we assume
+	 * that, in the overall I/O initiated in the burst, the I/O
+	 * associated to bfqq is finished. So bfqq does not need to be
+	 * treated as a queue belonging to a burst anymore. Accordingly,
+	 * we reset bfqq's in_large_burst flag if set, and remove bfqq
+	 * from the burst list if it's there. We do not decrement instead
+	 * burst_size, because the fact that bfqq does not need to belong
+	 * to the burst list any more does not invalidate the fact that
+	 * bfqq may have been activated during the current burst.
+	 */
+	if (idle_for_long_time) {
+		hlist_del_init(&bfqq->burst_list_node);
+		bfq_clear_bfqq_in_large_burst(bfqq);
+	}
+
+	/*
+	 * If bfqq is already in the burst list or is part of a large
+	 * burst, then there is nothing else to do.
+	 */
+	if (!hlist_unhashed(&bfqq->burst_list_node) ||
+	    bfq_bfqq_in_large_burst(bfqq))
+		return;
+
+	/*
+	 * If bfqq's activation happens late enough, then the current
+	 * burst is finished, and related data structures must be reset.
+	 *
+	 * In this respect, consider the special case where bfqq is the very
+	 * first queue being activated. In this case, last_ins_in_burst is
+	 * not yet significant when we get here. But it is easy to verify
+	 * that, whether or not the following condition is true, bfqq will
+	 * end up being inserted into the burst list. In particular the
+	 * list will happen to contain only bfqq. And this is exactly what
+	 * has to happen, as bfqq may be the first queue in a possible
+	 * burst.
+	 */
+	if (time_is_before_jiffies(bfqd->last_ins_in_burst +
+	    bfqd->bfq_burst_interval)) {
+		bfqd->large_burst = false;
+		bfq_reset_burst_list(bfqd, bfqq);
+		return;
+	}
+
+	/*
+	 * If we get here, then bfqq is being activated shortly after the
+	 * last queue. So, if the current burst is also large, we can mark
+	 * bfqq as belonging to this large burst immediately.
+	 */
+	if (bfqd->large_burst) {
+		bfq_mark_bfqq_in_large_burst(bfqq);
+		return;
+	}
+
+	/*
+	 * If we get here, then a large-burst state has not yet been
+	 * reached, but bfqq is being activated shortly after the last
+	 * queue. Then we add bfqq to the burst.
+	 */
+	bfq_add_to_burst(bfqd, bfqq);
+}
+
+static void bfq_add_request(struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+	struct bfq_entity *entity = &bfqq->entity;
+	struct bfq_data *bfqd = bfqq->bfqd;
+	struct request *next_rq, *prev;
+	unsigned long old_wr_coeff = bfqq->wr_coeff;
+	bool interactive = false;
+
+	bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
+	bfqq->queued[rq_is_sync(rq)]++;
+	bfqd->queued++;
+
+	elv_rb_add(&bfqq->sort_list, rq);
+
+	/*
+	 * Check if this request is a better next-serve candidate.
+	 */
+	prev = bfqq->next_rq;
+	next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
+	BUG_ON(next_rq == NULL);
+	bfqq->next_rq = next_rq;
+
+	/*
+	 * Adjust priority tree position, if next_rq changes.
+	 */
+	if (prev != bfqq->next_rq)
+		bfq_rq_pos_tree_add(bfqd, bfqq);
+
+	if (!bfq_bfqq_busy(bfqq)) {
+		bool soft_rt, coop_or_in_burst,
+		     idle_for_long_time = time_is_before_jiffies(
+						bfqq->budget_timeout +
+						bfqd->bfq_wr_min_idle_time);
+
+		if (bfq_bfqq_sync(bfqq)) {
+			bool already_in_burst =
+			   !hlist_unhashed(&bfqq->burst_list_node) ||
+			   bfq_bfqq_in_large_burst(bfqq);
+			bfq_handle_burst(bfqd, bfqq, idle_for_long_time);
+			/*
+			 * If bfqq was not already in the current burst,
+			 * then, at this point, bfqq either has been
+			 * added to the current burst or has caused the
+			 * current burst to terminate. In particular, in
+			 * the second case, bfqq has become the first
+			 * queue in a possible new burst.
+			 * In both cases last_ins_in_burst needs to be
+			 * moved forward.
+			 */
+			if (!already_in_burst)
+				bfqd->last_ins_in_burst = jiffies;
+		}
+
+		coop_or_in_burst = bfq_bfqq_in_large_burst(bfqq) ||
+			bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh;
+		soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
+			!coop_or_in_burst &&
+			time_is_before_jiffies(bfqq->soft_rt_next_start);
+		interactive = !coop_or_in_burst && idle_for_long_time;
+		entity->budget = max_t(unsigned long, bfqq->max_budget,
+				       bfq_serv_to_charge(next_rq, bfqq));
+
+		if (!bfq_bfqq_IO_bound(bfqq)) {
+			if (time_before(jiffies,
+					RQ_BIC(rq)->ttime.last_end_request +
+					bfqd->bfq_slice_idle)) {
+				bfqq->requests_within_timer++;
+				if (bfqq->requests_within_timer >=
+				    bfqd->bfq_requests_within_timer)
+					bfq_mark_bfqq_IO_bound(bfqq);
+			} else
+				bfqq->requests_within_timer = 0;
+		}
+
+		if (!bfqd->low_latency)
+			goto add_bfqq_busy;
+
+		if (bfq_bfqq_just_split(bfqq))
+			goto set_ioprio_changed;
+
+		/*
+		 * If the queue:
+		 * - is not being boosted,
+		 * - has been idle for enough time,
+		 * - is not a sync queue or is linked to a bfq_io_cq (it is
+		 *   shared "for its nature" or it is not shared and its
+		 *   requests have not been redirected to a shared queue)
+		 * start a weight-raising period.
+		 */
+		if (old_wr_coeff == 1 && (interactive || soft_rt) &&
+		    (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) {
+			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+			if (interactive)
+				bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+			else
+				bfqq->wr_cur_max_time =
+					bfqd->bfq_wr_rt_max_time;
+			bfq_log_bfqq(bfqd, bfqq,
+				     "wrais starting at %lu, rais_max_time %u",
+				     jiffies,
+				     jiffies_to_msecs(bfqq->wr_cur_max_time));
+		} else if (old_wr_coeff > 1) {
+			if (interactive)
+				bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+			else if (coop_or_in_burst ||
+				 (bfqq->wr_cur_max_time ==
+				  bfqd->bfq_wr_rt_max_time &&
+				  !soft_rt)) {
+				bfqq->wr_coeff = 1;
+				bfq_log_bfqq(bfqd, bfqq,
+					"wrais ending at %lu, rais_max_time %u",
+					jiffies,
+					jiffies_to_msecs(bfqq->
+						wr_cur_max_time));
+			} else if (time_before(
+					bfqq->last_wr_start_finish +
+					bfqq->wr_cur_max_time,
+					jiffies +
+					bfqd->bfq_wr_rt_max_time) &&
+				   soft_rt) {
+				/*
+				 *
+				 * The remaining weight-raising time is lower
+				 * than bfqd->bfq_wr_rt_max_time, which means
+				 * that the application is enjoying weight
+				 * raising either because deemed soft-rt in
+				 * the near past, or because deemed interactive
+				 * a long ago.
+				 * In both cases, resetting now the current
+				 * remaining weight-raising time for the
+				 * application to the weight-raising duration
+				 * for soft rt applications would not cause any
+				 * latency increase for the application (as the
+				 * new duration would be higher than the
+				 * remaining time).
+				 *
+				 * In addition, the application is now meeting
+				 * the requirements for being deemed soft rt.
+				 * In the end we can correctly and safely
+				 * (re)charge the weight-raising duration for
+				 * the application with the weight-raising
+				 * duration for soft rt applications.
+				 *
+				 * In particular, doing this recharge now, i.e.,
+				 * before the weight-raising period for the
+				 * application finishes, reduces the probability
+				 * of the following negative scenario:
+				 * 1) the weight of a soft rt application is
+				 *    raised at startup (as for any newly
+				 *    created application),
+				 * 2) since the application is not interactive,
+				 *    at a certain time weight-raising is
+				 *    stopped for the application,
+				 * 3) at that time the application happens to
+				 *    still have pending requests, and hence
+				 *    is destined to not have a chance to be
+				 *    deemed soft rt before these requests are
+				 *    completed (see the comments to the
+				 *    function bfq_bfqq_softrt_next_start()
+				 *    for details on soft rt detection),
+				 * 4) these pending requests experience a high
+				 *    latency because the application is not
+				 *    weight-raised while they are pending.
+				 */
+				bfqq->last_wr_start_finish = jiffies;
+				bfqq->wr_cur_max_time =
+					bfqd->bfq_wr_rt_max_time;
+			}
+		}
+set_ioprio_changed:
+		if (old_wr_coeff != bfqq->wr_coeff)
+			entity->ioprio_changed = 1;
+add_bfqq_busy:
+		bfqq->last_idle_bklogged = jiffies;
+		bfqq->service_from_backlogged = 0;
+		bfq_clear_bfqq_softrt_update(bfqq);
+		bfq_add_bfqq_busy(bfqd, bfqq);
+	} else {
+		if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) &&
+		    time_is_before_jiffies(
+				bfqq->last_wr_start_finish +
+				bfqd->bfq_wr_min_inter_arr_async)) {
+			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+
+			bfqd->wr_busy_queues++;
+			entity->ioprio_changed = 1;
+			bfq_log_bfqq(bfqd, bfqq,
+			    "non-idle wrais starting at %lu, rais_max_time %u",
+			    jiffies,
+			    jiffies_to_msecs(bfqq->wr_cur_max_time));
+		}
+		if (prev != bfqq->next_rq)
+			bfq_updated_next_req(bfqd, bfqq);
+	}
+
+	if (bfqd->low_latency &&
+		(old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive))
+		bfqq->last_wr_start_finish = jiffies;
+}
+
+static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,
+					  struct bio *bio)
+{
+	struct task_struct *tsk = current;
+	struct bfq_io_cq *bic;
+	struct bfq_queue *bfqq;
+
+	bic = bfq_bic_lookup(bfqd, tsk->io_context);
+	if (bic == NULL)
+		return NULL;
+
+	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
+	if (bfqq != NULL)
+		return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio));
+
+	return NULL;
+}
+
+static void bfq_activate_request(struct request_queue *q, struct request *rq)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+
+	bfqd->rq_in_driver++;
+	bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
+	bfq_log(bfqd, "activate_request: new bfqd->last_position %llu",
+		(long long unsigned)bfqd->last_position);
+}
+
+static inline void bfq_deactivate_request(struct request_queue *q,
+					  struct request *rq)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+
+	BUG_ON(bfqd->rq_in_driver == 0);
+	bfqd->rq_in_driver--;
+}
+
+static void bfq_remove_request(struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+	struct bfq_data *bfqd = bfqq->bfqd;
+	const int sync = rq_is_sync(rq);
+
+	if (bfqq->next_rq == rq) {
+		bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
+		bfq_updated_next_req(bfqd, bfqq);
+	}
+
+	if (rq->queuelist.prev != &rq->queuelist)
+		list_del_init(&rq->queuelist);
+	BUG_ON(bfqq->queued[sync] == 0);
+	bfqq->queued[sync]--;
+	bfqd->queued--;
+	elv_rb_del(&bfqq->sort_list, rq);
+
+	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue)
+			bfq_del_bfqq_busy(bfqd, bfqq, 1);
+		/*
+		 * Remove queue from request-position tree as it is empty.
+		 */
+		if (bfqq->pos_root != NULL) {
+			rb_erase(&bfqq->pos_node, bfqq->pos_root);
+			bfqq->pos_root = NULL;
+		}
+	}
+
+	if (rq->cmd_flags & REQ_META) {
+		BUG_ON(bfqq->meta_pending == 0);
+		bfqq->meta_pending--;
+	}
+}
+
+static int bfq_merge(struct request_queue *q, struct request **req,
+		     struct bio *bio)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct request *__rq;
+
+	__rq = bfq_find_rq_fmerge(bfqd, bio);
+	if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) {
+		*req = __rq;
+		return ELEVATOR_FRONT_MERGE;
+	}
+
+	return ELEVATOR_NO_MERGE;
+}
+
+static void bfq_merged_request(struct request_queue *q, struct request *req,
+			       int type)
+{
+	if (type == ELEVATOR_FRONT_MERGE &&
+	    rb_prev(&req->rb_node) &&
+	    blk_rq_pos(req) <
+	    blk_rq_pos(container_of(rb_prev(&req->rb_node),
+				    struct request, rb_node))) {
+		struct bfq_queue *bfqq = RQ_BFQQ(req);
+		struct bfq_data *bfqd = bfqq->bfqd;
+		struct request *prev, *next_rq;
+
+		/* Reposition request in its sort_list */
+		elv_rb_del(&bfqq->sort_list, req);
+		elv_rb_add(&bfqq->sort_list, req);
+		/* Choose next request to be served for bfqq */
+		prev = bfqq->next_rq;
+		next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req,
+					 bfqd->last_position);
+		BUG_ON(next_rq == NULL);
+		bfqq->next_rq = next_rq;
+		/*
+		 * If next_rq changes, update both the queue's budget to
+		 * fit the new request and the queue's position in its
+		 * rq_pos_tree.
+		 */
+		if (prev != bfqq->next_rq) {
+			bfq_updated_next_req(bfqd, bfqq);
+			bfq_rq_pos_tree_add(bfqd, bfqq);
+		}
+	}
+}
+
+static void bfq_merged_requests(struct request_queue *q, struct request *rq,
+				struct request *next)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
+
+	/*
+	 * If next and rq belong to the same bfq_queue and next is older
+	 * than rq, then reposition rq in the fifo (by substituting next
+	 * with rq). Otherwise, if next and rq belong to different
+	 * bfq_queues, never reposition rq: in fact, we would have to
+	 * reposition it with respect to next's position in its own fifo,
+	 * which would most certainly be too expensive with respect to
+	 * the benefits.
+	 */
+	if (bfqq == next_bfqq &&
+	    !list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
+	    time_before(next->fifo_time, rq->fifo_time)) {
+		list_del_init(&rq->queuelist);
+		list_replace_init(&next->queuelist, &rq->queuelist);
+		rq->fifo_time = next->fifo_time;
+	}
+
+	if (bfqq->next_rq == next)
+		bfqq->next_rq = rq;
+
+	bfq_remove_request(next);
+}
+
+/* Must be called with bfqq != NULL */
+static inline void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
+{
+	BUG_ON(bfqq == NULL);
+	if (bfq_bfqq_busy(bfqq))
+		bfqq->bfqd->wr_busy_queues--;
+	bfqq->wr_coeff = 1;
+	bfqq->wr_cur_max_time = 0;
+	/* Trigger a weight change on the next activation of the queue */
+	bfqq->entity.ioprio_changed = 1;
+}
+
+static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+				    struct bfq_group *bfqg)
+{
+	int i, j;
+
+	for (i = 0; i < 2; i++)
+		for (j = 0; j < IOPRIO_BE_NR; j++)
+			if (bfqg->async_bfqq[i][j] != NULL)
+				bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]);
+	if (bfqg->async_idle_bfqq != NULL)
+		bfq_bfqq_end_wr(bfqg->async_idle_bfqq);
+}
+
+static void bfq_end_wr(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq;
+
+	spin_lock_irq(bfqd->queue->queue_lock);
+
+	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
+		bfq_bfqq_end_wr(bfqq);
+	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)
+		bfq_bfqq_end_wr(bfqq);
+	bfq_end_wr_async(bfqd);
+
+	spin_unlock_irq(bfqd->queue->queue_lock);
+}
+
+static inline sector_t bfq_io_struct_pos(void *io_struct, bool request)
+{
+	if (request)
+		return blk_rq_pos(io_struct);
+	else
+		return ((struct bio *)io_struct)->bi_iter.bi_sector;
+}
+
+static inline sector_t bfq_dist_from(sector_t pos1,
+				     sector_t pos2)
+{
+	if (pos1 >= pos2)
+		return pos1 - pos2;
+	else
+		return pos2 - pos1;
+}
+
+static inline int bfq_rq_close_to_sector(void *io_struct, bool request,
+					 sector_t sector)
+{
+	return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <=
+	       BFQQ_SEEK_THR;
+}
+
+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector)
+{
+	struct rb_root *root = &bfqd->rq_pos_tree;
+	struct rb_node *parent, *node;
+	struct bfq_queue *__bfqq;
+
+	if (RB_EMPTY_ROOT(root))
+		return NULL;
+
+	/*
+	 * First, if we find a request starting at the end of the last
+	 * request, choose it.
+	 */
+	__bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);
+	if (__bfqq != NULL)
+		return __bfqq;
+
+	/*
+	 * If the exact sector wasn't found, the parent of the NULL leaf
+	 * will contain the closest sector (rq_pos_tree sorted by
+	 * next_request position).
+	 */
+	__bfqq = rb_entry(parent, struct bfq_queue, pos_node);
+	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
+		return __bfqq;
+
+	if (blk_rq_pos(__bfqq->next_rq) < sector)
+		node = rb_next(&__bfqq->pos_node);
+	else
+		node = rb_prev(&__bfqq->pos_node);
+	if (node == NULL)
+		return NULL;
+
+	__bfqq = rb_entry(node, struct bfq_queue, pos_node);
+	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
+		return __bfqq;
+
+	return NULL;
+}
+
+/*
+ * bfqd - obvious
+ * cur_bfqq - passed in so that we don't decide that the current queue
+ *            is closely cooperating with itself
+ * sector - used as a reference point to search for a close queue
+ */
+static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
+					      struct bfq_queue *cur_bfqq,
+					      sector_t sector)
+{
+	struct bfq_queue *bfqq;
+
+	if (bfq_class_idle(cur_bfqq))
+		return NULL;
+	if (!bfq_bfqq_sync(cur_bfqq))
+		return NULL;
+	if (BFQQ_SEEKY(cur_bfqq))
+		return NULL;
+
+	/* If device has only one backlogged bfq_queue, don't search. */
+	if (bfqd->busy_queues == 1)
+		return NULL;
+
+	/*
+	 * We should notice if some of the queues are cooperating, e.g.
+	 * working closely on the same area of the disk. In that case,
+	 * we can group them together and don't waste time idling.
+	 */
+	bfqq = bfqq_close(bfqd, sector);
+	if (bfqq == NULL || bfqq == cur_bfqq)
+		return NULL;
+
+	/*
+	 * Do not merge queues from different bfq_groups.
+	*/
+	if (bfqq->entity.parent != cur_bfqq->entity.parent)
+		return NULL;
+
+	/*
+	 * It only makes sense to merge sync queues.
+	 */
+	if (!bfq_bfqq_sync(bfqq))
+		return NULL;
+	if (BFQQ_SEEKY(bfqq))
+		return NULL;
+
+	/*
+	 * Do not merge queues of different priority classes.
+	 */
+	if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq))
+		return NULL;
+
+	return bfqq;
+}
+
+static struct bfq_queue *
+bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+{
+	int process_refs, new_process_refs;
+	struct bfq_queue *__bfqq;
+
+	/*
+	 * If there are no process references on the new_bfqq, then it is
+	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
+	 * may have dropped their last reference (not just their last process
+	 * reference).
+	 */
+	if (!bfqq_process_refs(new_bfqq))
+		return NULL;
+
+	/* Avoid a circular list and skip interim queue merges. */
+	while ((__bfqq = new_bfqq->new_bfqq)) {
+		if (__bfqq == bfqq)
+			return NULL;
+		new_bfqq = __bfqq;
+	}
+
+	process_refs = bfqq_process_refs(bfqq);
+	new_process_refs = bfqq_process_refs(new_bfqq);
+	/*
+	 * If the process for the bfqq has gone away, there is no
+	 * sense in merging the queues.
+	 */
+	if (process_refs == 0 || new_process_refs == 0)
+		return NULL;
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
+		new_bfqq->pid);
+
+	/*
+	 * Merging is just a redirection: the requests of the process
+	 * owning one of the two queues are redirected to the other queue.
+	 * The latter queue, in its turn, is set as shared if this is the
+	 * first time that the requests of some process are redirected to
+	 * it.
+	 *
+	 * We redirect bfqq to new_bfqq and not the opposite, because we
+	 * are in the context of the process owning bfqq, hence we have
+	 * the io_cq of this process. So we can immediately configure this
+	 * io_cq to redirect the requests of the process to new_bfqq.
+	 *
+	 * NOTE, even if new_bfqq coincides with the in-service queue, the
+	 * io_cq of new_bfqq is not available, because, if the in-service
+	 * queue is shared, bfqd->in_service_bic may not point to the
+	 * io_cq of the in-service queue.
+	 * Redirecting the requests of the process owning bfqq to the
+	 * currently in-service queue is in any case the best option, as
+	 * we feed the in-service queue with new requests close to the
+	 * last request served and, by doing so, hopefully increase the
+	 * throughput.
+	 */
+	bfqq->new_bfqq = new_bfqq;
+	atomic_add(process_refs, &new_bfqq->ref);
+	return new_bfqq;
+}
+
+/*
+ * Attempt to schedule a merge of bfqq with the currently in-service queue
+ * or with a close queue among the scheduled queues.
+ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue
+ * structure otherwise.
+ *
+ * The OOM queue is not allowed to participate to cooperation: in fact, since
+ * the requests temporarily redirected to the OOM queue could be redirected
+ * again to dedicated queues at any time, the state needed to correctly
+ * handle merging with the OOM queue would be quite complex and expensive
+ * to maintain. Besides, in such a critical condition as an out of memory,
+ * the benefits of queue merging may be little relevant, or even negligible.
+ */
+static struct bfq_queue *
+bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+		     void *io_struct, bool request)
+{
+	struct bfq_queue *in_service_bfqq, *new_bfqq;
+
+	if (bfqq->new_bfqq)
+		return bfqq->new_bfqq;
+
+	if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
+		return NULL;
+
+	in_service_bfqq = bfqd->in_service_queue;
+
+	if (in_service_bfqq == NULL || in_service_bfqq == bfqq ||
+	    !bfqd->in_service_bic ||
+	    unlikely(in_service_bfqq == &bfqd->oom_bfqq))
+		goto check_scheduled;
+
+	if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq))
+		goto check_scheduled;
+
+	if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq))
+		goto check_scheduled;
+
+	if (in_service_bfqq->entity.parent != bfqq->entity.parent)
+		goto check_scheduled;
+
+	if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
+	    bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) {
+		new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
+		if (new_bfqq != NULL)
+			return new_bfqq; /* Merge with in-service queue */
+	}
+
+	/*
+	 * Check whether there is a cooperator among currently scheduled
+	 * queues. The only thing we need is that the bio/request is not
+	 * NULL, as we need it to establish whether a cooperator exists.
+	 */
+check_scheduled:
+	new_bfqq = bfq_close_cooperator(bfqd, bfqq,
+					bfq_io_struct_pos(io_struct, request));
+	if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq))
+		return bfq_setup_merge(bfqq, new_bfqq);
+
+	return NULL;
+}
+
+static inline void
+bfq_bfqq_save_state(struct bfq_queue *bfqq)
+{
+	/*
+	 * If bfqq->bic == NULL, the queue is already shared or its requests
+	 * have already been redirected to a shared queue; both idle window
+	 * and weight raising state have already been saved. Do nothing.
+	 */
+	if (bfqq->bic == NULL)
+		return;
+	if (bfqq->bic->wr_time_left)
+		/*
+		 * This is the queue of a just-started process, and would
+		 * deserve weight raising: we set wr_time_left to the full
+		 * weight-raising duration to trigger weight-raising when
+		 * and if the queue is split and the first request of the
+		 * queue is enqueued.
+		 */
+		bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd);
+	else if (bfqq->wr_coeff > 1) {
+		unsigned long wr_duration =
+			jiffies - bfqq->last_wr_start_finish;
+		/*
+		 * It may happen that a queue's weight raising period lasts
+		 * longer than its wr_cur_max_time, as weight raising is
+		 * handled only when a request is enqueued or dispatched (it
+		 * does not use any timer). If the weight raising period is
+		 * about to end, don't save it.
+		 */
+		if (bfqq->wr_cur_max_time <= wr_duration)
+			bfqq->bic->wr_time_left = 0;
+		else
+			bfqq->bic->wr_time_left =
+				bfqq->wr_cur_max_time - wr_duration;
+		/*
+		 * The bfq_queue is becoming shared or the requests of the
+		 * process owning the queue are being redirected to a shared
+		 * queue. Stop the weight raising period of the queue, as in
+		 * both cases it should not be owned by an interactive or
+		 * soft real-time application.
+		 */
+		bfq_bfqq_end_wr(bfqq);
+	} else
+		bfqq->bic->wr_time_left = 0;
+	bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
+	bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
+	bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
+	bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
+	bfqq->bic->cooperations++;
+	bfqq->bic->failed_cooperations = 0;
+}
+
+static inline void
+bfq_get_bic_reference(struct bfq_queue *bfqq)
+{
+	/*
+	 * If bfqq->bic has a non-NULL value, the bic to which it belongs
+	 * is about to begin using a shared bfq_queue.
+	 */
+	if (bfqq->bic)
+		atomic_long_inc(&bfqq->bic->icq.ioc->refcount);
+}
+
+static void
+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+		struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+{
+	bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
+		(long unsigned)new_bfqq->pid);
+	/* Save weight raising and idle window of the merged queues */
+	bfq_bfqq_save_state(bfqq);
+	bfq_bfqq_save_state(new_bfqq);
+	if (bfq_bfqq_IO_bound(bfqq))
+		bfq_mark_bfqq_IO_bound(new_bfqq);
+	bfq_clear_bfqq_IO_bound(bfqq);
+	/*
+	 * Grab a reference to the bic, to prevent it from being destroyed
+	 * before being possibly touched by a bfq_split_bfqq().
+	 */
+	bfq_get_bic_reference(bfqq);
+	bfq_get_bic_reference(new_bfqq);
+	/*
+	 * Merge queues (that is, let bic redirect its requests to new_bfqq)
+	 */
+	bic_set_bfqq(bic, new_bfqq, 1);
+	bfq_mark_bfqq_coop(new_bfqq);
+	/*
+	 * new_bfqq now belongs to at least two bics (it is a shared queue):
+	 * set new_bfqq->bic to NULL. bfqq either:
+	 * - does not belong to any bic any more, and hence bfqq->bic must
+	 *   be set to NULL, or
+	 * - is a queue whose owning bics have already been redirected to a
+	 *   different queue, hence the queue is destined to not belong to
+	 *   any bic soon and bfqq->bic is already NULL (therefore the next
+	 *   assignment causes no harm).
+	 */
+	new_bfqq->bic = NULL;
+	bfqq->bic = NULL;
+	bfq_put_queue(bfqq);
+}
+
+static inline void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq)
+{
+	struct bfq_io_cq *bic = bfqq->bic;
+	struct bfq_data *bfqd = bfqq->bfqd;
+
+	if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) {
+		bic->failed_cooperations++;
+		if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations)
+			bic->cooperations = 0;
+	}
+}
+
+static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+			   struct bio *bio)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct bfq_io_cq *bic;
+	struct bfq_queue *bfqq, *new_bfqq;
+
+	/*
+	 * Disallow merge of a sync bio into an async request.
+	 */
+	if (bfq_bio_sync(bio) && !rq_is_sync(rq))
+		return 0;
+
+	/*
+	 * Lookup the bfqq that this bio will be queued with. Allow
+	 * merge only if rq is queued there.
+	 * Queue lock is held here.
+	 */
+	bic = bfq_bic_lookup(bfqd, current->io_context);
+	if (bic == NULL)
+		return 0;
+
+	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
+	/*
+	 * We take advantage of this function to perform an early merge
+	 * of the queues of possible cooperating processes.
+	 */
+	if (bfqq != NULL) {
+		new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);
+		if (new_bfqq != NULL) {
+			bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
+			/*
+			 * If we get here, the bio will be queued in the
+			 * shared queue, i.e., new_bfqq, so use new_bfqq
+			 * to decide whether bio and rq can be merged.
+			 */
+			bfqq = new_bfqq;
+		} else
+			bfq_bfqq_increase_failed_cooperations(bfqq);
+	}
+
+	return bfqq == RQ_BFQQ(rq);
+}
+
+static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
+				       struct bfq_queue *bfqq)
+{
+	if (bfqq != NULL) {
+		bfq_mark_bfqq_must_alloc(bfqq);
+		bfq_mark_bfqq_budget_new(bfqq);
+		bfq_clear_bfqq_fifo_expire(bfqq);
+
+		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
+
+		bfq_log_bfqq(bfqd, bfqq,
+			     "set_in_service_queue, cur-budget = %lu",
+			     bfqq->entity.budget);
+	}
+
+	bfqd->in_service_queue = bfqq;
+}
+
+/*
+ * Get and set a new queue for service.
+ */
+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq = bfq_get_next_queue(bfqd);
+
+	__bfq_set_in_service_queue(bfqd, bfqq);
+	return bfqq;
+}
+
+/*
+ * If enough samples have been computed, return the current max budget
+ * stored in bfqd, which is dynamically updated according to the
+ * estimated disk peak rate; otherwise return the default max budget
+ */
+static inline unsigned long bfq_max_budget(struct bfq_data *bfqd)
+{
+	if (bfqd->budgets_assigned < 194)
+		return bfq_default_max_budget;
+	else
+		return bfqd->bfq_max_budget;
+}
+
+/*
+ * Return min budget, which is a fraction of the current or default
+ * max budget (trying with 1/32)
+ */
+static inline unsigned long bfq_min_budget(struct bfq_data *bfqd)
+{
+	if (bfqd->budgets_assigned < 194)
+		return bfq_default_max_budget / 32;
+	else
+		return bfqd->bfq_max_budget / 32;
+}
+
+static void bfq_arm_slice_timer(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq = bfqd->in_service_queue;
+	struct bfq_io_cq *bic;
+	unsigned long sl;
+
+	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+
+	/* Processes have exited, don't wait. */
+	bic = bfqd->in_service_bic;
+	if (bic == NULL || atomic_read(&bic->icq.ioc->active_ref) == 0)
+		return;
+
+	bfq_mark_bfqq_wait_request(bfqq);
+
+	/*
+	 * We don't want to idle for seeks, but we do want to allow
+	 * fair distribution of slice time for a process doing back-to-back
+	 * seeks. So allow a little bit of time for him to submit a new rq.
+	 *
+	 * To prevent processes with (partly) seeky workloads from
+	 * being too ill-treated, grant them a small fraction of the
+	 * assigned budget before reducing the waiting time to
+	 * BFQ_MIN_TT. This happened to help reduce latency.
+	 */
+	sl = bfqd->bfq_slice_idle;
+	/*
+	 * Unless the queue is being weight-raised or the scenario is
+	 * asymmetric, grant only minimum idle time if the queue either
+	 * has been seeky for long enough or has already proved to be
+	 * constantly seeky.
+	 */
+	if (bfq_sample_valid(bfqq->seek_samples) &&
+	    ((BFQQ_SEEKY(bfqq) && bfqq->entity.service >
+				  bfq_max_budget(bfqq->bfqd) / 8) ||
+	      bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1 &&
+	    symmetric_scenario)
+		sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
+	else if (bfqq->wr_coeff > 1)
+		sl = sl * 3;
+	bfqd->last_idling_start = ktime_get();
+	mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
+	bfq_log(bfqd, "arm idle: %u/%u ms",
+		jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
+}
+
+/*
+ * Set the maximum time for the in-service queue to consume its
+ * budget. This prevents seeky processes from lowering the disk
+ * throughput (always guaranteed with a time slice scheme as in CFQ).
+ */
+static void bfq_set_budget_timeout(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq = bfqd->in_service_queue;
+	unsigned int timeout_coeff;
+	if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time)
+		timeout_coeff = 1;
+	else
+		timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
+
+	bfqd->last_budget_start = ktime_get();
+
+	bfq_clear_bfqq_budget_new(bfqq);
+	bfqq->budget_timeout = jiffies +
+		bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff;
+
+	bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",
+		jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] *
+		timeout_coeff));
+}
+
+/*
+ * Move request from internal lists to the request queue dispatch list.
+ */
+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+
+	/*
+	 * For consistency, the next instruction should have been executed
+	 * after removing the request from the queue and dispatching it.
+	 * We execute instead this instruction before bfq_remove_request()
+	 * (and hence introduce a temporary inconsistency), for efficiency.
+	 * In fact, in a forced_dispatch, this prevents two counters related
+	 * to bfqq->dispatched to risk to be uselessly decremented if bfqq
+	 * is not in service, and then to be incremented again after
+	 * incrementing bfqq->dispatched.
+	 */
+	bfqq->dispatched++;
+	bfq_remove_request(rq);
+	elv_dispatch_sort(q, rq);
+
+	if (bfq_bfqq_sync(bfqq))
+		bfqd->sync_flight++;
+}
+
+/*
+ * Return expired entry, or NULL to just start from scratch in rbtree.
+ */
+static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
+{
+	struct request *rq = NULL;
+
+	if (bfq_bfqq_fifo_expire(bfqq))
+		return NULL;
+
+	bfq_mark_bfqq_fifo_expire(bfqq);
+
+	if (list_empty(&bfqq->fifo))
+		return NULL;
+
+	rq = rq_entry_fifo(bfqq->fifo.next);
+
+	if (time_before(jiffies, rq->fifo_time))
+		return NULL;
+
+	return rq;
+}
+
+static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+	return entity->budget - entity->service;
+}
+
+static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	BUG_ON(bfqq != bfqd->in_service_queue);
+
+	__bfq_bfqd_reset_in_service(bfqd);
+
+	/*
+	 * If this bfqq is shared between multiple processes, check
+	 * to make sure that those processes are still issuing I/Os
+	 * within the mean seek distance. If not, it may be time to
+	 * break the queues apart again.
+	 */
+	if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))
+		bfq_mark_bfqq_split_coop(bfqq);
+
+	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+		/*
+		 * Overloading budget_timeout field to store the time
+		 * at which the queue remains with no backlog; used by
+		 * the weight-raising mechanism.
+		 */
+		bfqq->budget_timeout = jiffies;
+		bfq_del_bfqq_busy(bfqd, bfqq, 1);
+	} else {
+		bfq_activate_bfqq(bfqd, bfqq);
+		/*
+		 * Resort priority tree of potential close cooperators.
+		 */
+		bfq_rq_pos_tree_add(bfqd, bfqq);
+	}
+}
+
+/**
+ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior.
+ * @bfqd: device data.
+ * @bfqq: queue to update.
+ * @reason: reason for expiration.
+ *
+ * Handle the feedback on @bfqq budget.  See the body for detailed
+ * comments.
+ */
+static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+				     struct bfq_queue *bfqq,
+				     enum bfqq_expiration reason)
+{
+	struct request *next_rq;
+	unsigned long budget, min_budget;
+
+	budget = bfqq->max_budget;
+	min_budget = bfq_min_budget(bfqd);
+
+	BUG_ON(bfqq != bfqd->in_service_queue);
+
+	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu",
+		bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
+	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu",
+		budget, bfq_min_budget(bfqd));
+	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
+		bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
+
+	if (bfq_bfqq_sync(bfqq)) {
+		switch (reason) {
+		/*
+		 * Caveat: in all the following cases we trade latency
+		 * for throughput.
+		 */
+		case BFQ_BFQQ_TOO_IDLE:
+			/*
+			 * This is the only case where we may reduce
+			 * the budget: if there is no request of the
+			 * process still waiting for completion, then
+			 * we assume (tentatively) that the timer has
+			 * expired because the batch of requests of
+			 * the process could have been served with a
+			 * smaller budget.  Hence, betting that
+			 * process will behave in the same way when it
+			 * becomes backlogged again, we reduce its
+			 * next budget.  As long as we guess right,
+			 * this budget cut reduces the latency
+			 * experienced by the process.
+			 *
+			 * However, if there are still outstanding
+			 * requests, then the process may have not yet
+			 * issued its next request just because it is
+			 * still waiting for the completion of some of
+			 * the still outstanding ones.  So in this
+			 * subcase we do not reduce its budget, on the
+			 * contrary we increase it to possibly boost
+			 * the throughput, as discussed in the
+			 * comments to the BUDGET_TIMEOUT case.
+			 */
+			if (bfqq->dispatched > 0) /* still outstanding reqs */
+				budget = min(budget * 2, bfqd->bfq_max_budget);
+			else {
+				if (budget > 5 * min_budget)
+					budget -= 4 * min_budget;
+				else
+					budget = min_budget;
+			}
+			break;
+		case BFQ_BFQQ_BUDGET_TIMEOUT:
+			/*
+			 * We double the budget here because: 1) it
+			 * gives the chance to boost the throughput if
+			 * this is not a seeky process (which may have
+			 * bumped into this timeout because of, e.g.,
+			 * ZBR), 2) together with charge_full_budget
+			 * it helps give seeky processes higher
+			 * timestamps, and hence be served less
+			 * frequently.
+			 */
+			budget = min(budget * 2, bfqd->bfq_max_budget);
+			break;
+		case BFQ_BFQQ_BUDGET_EXHAUSTED:
+			/*
+			 * The process still has backlog, and did not
+			 * let either the budget timeout or the disk
+			 * idling timeout expire. Hence it is not
+			 * seeky, has a short thinktime and may be
+			 * happy with a higher budget too. So
+			 * definitely increase the budget of this good
+			 * candidate to boost the disk throughput.
+			 */
+			budget = min(budget * 4, bfqd->bfq_max_budget);
+			break;
+		case BFQ_BFQQ_NO_MORE_REQUESTS:
+		       /*
+			* Leave the budget unchanged.
+			*/
+		default:
+			return;
+		}
+	} else /* async queue */
+	    /* async queues get always the maximum possible budget
+	     * (their ability to dispatch is limited by
+	     * @bfqd->bfq_max_budget_async_rq).
+	     */
+		budget = bfqd->bfq_max_budget;
+
+	bfqq->max_budget = budget;
+
+	if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 &&
+	    bfqq->max_budget > bfqd->bfq_max_budget)
+		bfqq->max_budget = bfqd->bfq_max_budget;
+
+	/*
+	 * Make sure that we have enough budget for the next request.
+	 * Since the finish time of the bfqq must be kept in sync with
+	 * the budget, be sure to call __bfq_bfqq_expire() after the
+	 * update.
+	 */
+	next_rq = bfqq->next_rq;
+	if (next_rq != NULL)
+		bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
+					    bfq_serv_to_charge(next_rq, bfqq));
+	else
+		bfqq->entity.budget = bfqq->max_budget;
+
+	bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu",
+			next_rq != NULL ? blk_rq_sectors(next_rq) : 0,
+			bfqq->entity.budget);
+}
+
+static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout)
+{
+	unsigned long max_budget;
+
+	/*
+	 * The max_budget calculated when autotuning is equal to the
+	 * amount of sectors transfered in timeout_sync at the
+	 * estimated peak rate.
+	 */
+	max_budget = (unsigned long)(peak_rate * 1000 *
+				     timeout >> BFQ_RATE_SHIFT);
+
+	return max_budget;
+}
+
+/*
+ * In addition to updating the peak rate, checks whether the process
+ * is "slow", and returns 1 if so. This slow flag is used, in addition
+ * to the budget timeout, to reduce the amount of service provided to
+ * seeky processes, and hence reduce their chances to lower the
+ * throughput. See the code for more details.
+ */
+static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+				int compensate, enum bfqq_expiration reason)
+{
+	u64 bw, usecs, expected, timeout;
+	ktime_t delta;
+	int update = 0;
+
+	if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq))
+		return 0;
+
+	if (compensate)
+		delta = bfqd->last_idling_start;
+	else
+		delta = ktime_get();
+	delta = ktime_sub(delta, bfqd->last_budget_start);
+	usecs = ktime_to_us(delta);
+
+	/* Don't trust short/unrealistic values. */
+	if (usecs < 100 || usecs >= LONG_MAX)
+		return 0;
+
+	/*
+	 * Calculate the bandwidth for the last slice.  We use a 64 bit
+	 * value to store the peak rate, in sectors per usec in fixed
+	 * point math.  We do so to have enough precision in the estimate
+	 * and to avoid overflows.
+	 */
+	bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;
+	do_div(bw, (unsigned long)usecs);
+
+	timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
+
+	/*
+	 * Use only long (> 20ms) intervals to filter out spikes for
+	 * the peak rate estimation.
+	 */
+	if (usecs > 20000) {
+		if (bw > bfqd->peak_rate ||
+		   (!BFQQ_SEEKY(bfqq) &&
+		    reason == BFQ_BFQQ_BUDGET_TIMEOUT)) {
+			bfq_log(bfqd, "measured bw =%llu", bw);
+			/*
+			 * To smooth oscillations use a low-pass filter with
+			 * alpha=7/8, i.e.,
+			 * new_rate = (7/8) * old_rate + (1/8) * bw
+			 */
+			do_div(bw, 8);
+			if (bw == 0)
+				return 0;
+			bfqd->peak_rate *= 7;
+			do_div(bfqd->peak_rate, 8);
+			bfqd->peak_rate += bw;
+			update = 1;
+			bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate);
+		}
+
+		update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;
+
+		if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES)
+			bfqd->peak_rate_samples++;
+
+		if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES &&
+		    update) {
+			int dev_type = blk_queue_nonrot(bfqd->queue);
+			if (bfqd->bfq_user_max_budget == 0) {
+				bfqd->bfq_max_budget =
+					bfq_calc_max_budget(bfqd->peak_rate,
+							    timeout);
+				bfq_log(bfqd, "new max_budget=%lu",
+					bfqd->bfq_max_budget);
+			}
+			if (bfqd->device_speed == BFQ_BFQD_FAST &&
+			    bfqd->peak_rate < device_speed_thresh[dev_type]) {
+				bfqd->device_speed = BFQ_BFQD_SLOW;
+				bfqd->RT_prod = R_slow[dev_type] *
+						T_slow[dev_type];
+			} else if (bfqd->device_speed == BFQ_BFQD_SLOW &&
+			    bfqd->peak_rate > device_speed_thresh[dev_type]) {
+				bfqd->device_speed = BFQ_BFQD_FAST;
+				bfqd->RT_prod = R_fast[dev_type] *
+						T_fast[dev_type];
+			}
+		}
+	}
+
+	/*
+	 * If the process has been served for a too short time
+	 * interval to let its possible sequential accesses prevail on
+	 * the initial seek time needed to move the disk head on the
+	 * first sector it requested, then give the process a chance
+	 * and for the moment return false.
+	 */
+	if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8)
+		return 0;
+
+	/*
+	 * A process is considered ``slow'' (i.e., seeky, so that we
+	 * cannot treat it fairly in the service domain, as it would
+	 * slow down too much the other processes) if, when a slice
+	 * ends for whatever reason, it has received service at a
+	 * rate that would not be high enough to complete the budget
+	 * before the budget timeout expiration.
+	 */
+	expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT;
+
+	/*
+	 * Caveat: processes doing IO in the slower disk zones will
+	 * tend to be slow(er) even if not seeky. And the estimated
+	 * peak rate will actually be an average over the disk
+	 * surface. Hence, to not be too harsh with unlucky processes,
+	 * we keep a budget/3 margin of safety before declaring a
+	 * process slow.
+	 */
+	return expected > (4 * bfqq->entity.budget) / 3;
+}
+
+/*
+ * To be deemed as soft real-time, an application must meet two
+ * requirements. First, the application must not require an average
+ * bandwidth higher than the approximate bandwidth required to playback or
+ * record a compressed high-definition video.
+ * The next function is invoked on the completion of the last request of a
+ * batch, to compute the next-start time instant, soft_rt_next_start, such
+ * that, if the next request of the application does not arrive before
+ * soft_rt_next_start, then the above requirement on the bandwidth is met.
+ *
+ * The second requirement is that the request pattern of the application is
+ * isochronous, i.e., that, after issuing a request or a batch of requests,
+ * the application stops issuing new requests until all its pending requests
+ * have been completed. After that, the application may issue a new batch,
+ * and so on.
+ * For this reason the next function is invoked to compute
+ * soft_rt_next_start only for applications that meet this requirement,
+ * whereas soft_rt_next_start is set to infinity for applications that do
+ * not.
+ *
+ * Unfortunately, even a greedy application may happen to behave in an
+ * isochronous way if the CPU load is high. In fact, the application may
+ * stop issuing requests while the CPUs are busy serving other processes,
+ * then restart, then stop again for a while, and so on. In addition, if
+ * the disk achieves a low enough throughput with the request pattern
+ * issued by the application (e.g., because the request pattern is random
+ * and/or the device is slow), then the application may meet the above
+ * bandwidth requirement too. To prevent such a greedy application to be
+ * deemed as soft real-time, a further rule is used in the computation of
+ * soft_rt_next_start: soft_rt_next_start must be higher than the current
+ * time plus the maximum time for which the arrival of a request is waited
+ * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle.
+ * This filters out greedy applications, as the latter issue instead their
+ * next request as soon as possible after the last one has been completed
+ * (in contrast, when a batch of requests is completed, a soft real-time
+ * application spends some time processing data).
+ *
+ * Unfortunately, the last filter may easily generate false positives if
+ * only bfqd->bfq_slice_idle is used as a reference time interval and one
+ * or both the following cases occur:
+ * 1) HZ is so low that the duration of a jiffy is comparable to or higher
+ *    than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with
+ *    HZ=100.
+ * 2) jiffies, instead of increasing at a constant rate, may stop increasing
+ *    for a while, then suddenly 'jump' by several units to recover the lost
+ *    increments. This seems to happen, e.g., inside virtual machines.
+ * To address this issue, we do not use as a reference time interval just
+ * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In
+ * particular we add the minimum number of jiffies for which the filter
+ * seems to be quite precise also in embedded systems and KVM/QEMU virtual
+ * machines.
+ */
+static inline unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
+						       struct bfq_queue *bfqq)
+{
+	return max(bfqq->last_idle_bklogged +
+		   HZ * bfqq->service_from_backlogged /
+		   bfqd->bfq_wr_max_softrt_rate,
+		   jiffies + bfqq->bfqd->bfq_slice_idle + 4);
+}
+
+/*
+ * Return the largest-possible time instant such that, for as long as possible,
+ * the current time will be lower than this time instant according to the macro
+ * time_is_before_jiffies().
+ */
+static inline unsigned long bfq_infinity_from_now(unsigned long now)
+{
+	return now + ULONG_MAX / 2;
+}
+
+/**
+ * bfq_bfqq_expire - expire a queue.
+ * @bfqd: device owning the queue.
+ * @bfqq: the queue to expire.
+ * @compensate: if true, compensate for the time spent idling.
+ * @reason: the reason causing the expiration.
+ *
+ *
+ * If the process associated to the queue is slow (i.e., seeky), or in
+ * case of budget timeout, or, finally, if it is async, we
+ * artificially charge it an entire budget (independently of the
+ * actual service it received). As a consequence, the queue will get
+ * higher timestamps than the correct ones upon reactivation, and
+ * hence it will be rescheduled as if it had received more service
+ * than what it actually received. In the end, this class of processes
+ * will receive less service in proportion to how slowly they consume
+ * their budgets (and hence how seriously they tend to lower the
+ * throughput).
+ *
+ * In contrast, when a queue expires because it has been idling for
+ * too much or because it exhausted its budget, we do not touch the
+ * amount of service it has received. Hence when the queue will be
+ * reactivated and its timestamps updated, the latter will be in sync
+ * with the actual service received by the queue until expiration.
+ *
+ * Charging a full budget to the first type of queues and the exact
+ * service to the others has the effect of using the WF2Q+ policy to
+ * schedule the former on a timeslice basis, without violating the
+ * service domain guarantees of the latter.
+ */
+static void bfq_bfqq_expire(struct bfq_data *bfqd,
+			    struct bfq_queue *bfqq,
+			    int compensate,
+			    enum bfqq_expiration reason)
+{
+	int slow;
+	BUG_ON(bfqq != bfqd->in_service_queue);
+
+	/* Update disk peak rate for autotuning and check whether the
+	 * process is slow (see bfq_update_peak_rate).
+	 */
+	slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason);
+
+	/*
+	 * As above explained, 'punish' slow (i.e., seeky), timed-out
+	 * and async queues, to favor sequential sync workloads.
+	 *
+	 * Processes doing I/O in the slower disk zones will tend to be
+	 * slow(er) even if not seeky. Hence, since the estimated peak
+	 * rate is actually an average over the disk surface, these
+	 * processes may timeout just for bad luck. To avoid punishing
+	 * them we do not charge a full budget to a process that
+	 * succeeded in consuming at least 2/3 of its budget.
+	 */
+	if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
+		     bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3))
+		bfq_bfqq_charge_full_budget(bfqq);
+
+	bfqq->service_from_backlogged += bfqq->entity.service;
+
+	if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
+	    !bfq_bfqq_constantly_seeky(bfqq)) {
+		bfq_mark_bfqq_constantly_seeky(bfqq);
+		if (!blk_queue_nonrot(bfqd->queue))
+			bfqd->const_seeky_busy_in_flight_queues++;
+	}
+
+	if (reason == BFQ_BFQQ_TOO_IDLE &&
+	    bfqq->entity.service <= 2 * bfqq->entity.budget / 10 )
+		bfq_clear_bfqq_IO_bound(bfqq);
+
+	if (bfqd->low_latency && bfqq->wr_coeff == 1)
+		bfqq->last_wr_start_finish = jiffies;
+
+	if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 &&
+	    RB_EMPTY_ROOT(&bfqq->sort_list)) {
+		/*
+		 * If we get here, and there are no outstanding requests,
+		 * then the request pattern is isochronous (see the comments
+		 * to the function bfq_bfqq_softrt_next_start()). Hence we
+		 * can compute soft_rt_next_start. If, instead, the queue
+		 * still has outstanding requests, then we have to wait
+		 * for the completion of all the outstanding requests to
+		 * discover whether the request pattern is actually
+		 * isochronous.
+		 */
+		if (bfqq->dispatched == 0)
+			bfqq->soft_rt_next_start =
+				bfq_bfqq_softrt_next_start(bfqd, bfqq);
+		else {
+			/*
+			 * The application is still waiting for the
+			 * completion of one or more requests:
+			 * prevent it from possibly being incorrectly
+			 * deemed as soft real-time by setting its
+			 * soft_rt_next_start to infinity. In fact,
+			 * without this assignment, the application
+			 * would be incorrectly deemed as soft
+			 * real-time if:
+			 * 1) it issued a new request before the
+			 *    completion of all its in-flight
+			 *    requests, and
+			 * 2) at that time, its soft_rt_next_start
+			 *    happened to be in the past.
+			 */
+			bfqq->soft_rt_next_start =
+				bfq_infinity_from_now(jiffies);
+			/*
+			 * Schedule an update of soft_rt_next_start to when
+			 * the task may be discovered to be isochronous.
+			 */
+			bfq_mark_bfqq_softrt_update(bfqq);
+		}
+	}
+
+	bfq_log_bfqq(bfqd, bfqq,
+		"expire (%d, slow %d, num_disp %d, idle_win %d)", reason,
+		slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
+
+	/*
+	 * Increase, decrease or leave budget unchanged according to
+	 * reason.
+	 */
+	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
+	__bfq_bfqq_expire(bfqd, bfqq);
+}
+
+/*
+ * Budget timeout is not implemented through a dedicated timer, but
+ * just checked on request arrivals and completions, as well as on
+ * idle timer expirations.
+ */
+static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
+{
+	if (bfq_bfqq_budget_new(bfqq) ||
+	    time_before(jiffies, bfqq->budget_timeout))
+		return 0;
+	return 1;
+}
+
+/*
+ * If we expire a queue that is waiting for the arrival of a new
+ * request, we may prevent the fictitious timestamp back-shifting that
+ * allows the guarantees of the queue to be preserved (see [1] for
+ * this tricky aspect). Hence we return true only if this condition
+ * does not hold, or if the queue is slow enough to deserve only to be
+ * kicked off for preserving a high throughput.
+*/
+static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
+{
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		"may_budget_timeout: wait_request %d left %d timeout %d",
+		bfq_bfqq_wait_request(bfqq),
+			bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3,
+		bfq_bfqq_budget_timeout(bfqq));
+
+	return (!bfq_bfqq_wait_request(bfqq) ||
+		bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3)
+		&&
+		bfq_bfqq_budget_timeout(bfqq);
+}
+
+/*
+ * Device idling is allowed only for the queues for which this function
+ * returns true. For this reason, the return value of this function plays a
+ * critical role for both throughput boosting and service guarantees. The
+ * return value is computed through a logical expression. In this rather
+ * long comment, we try to briefly describe all the details and motivations
+ * behind the components of this logical expression.
+ *
+ * First, the expression is false if bfqq is not sync, or if: bfqq happened
+ * to become active during a large burst of queue activations, and the
+ * pattern of requests bfqq contains boosts the throughput if bfqq is
+ * expired. In fact, queues that became active during a large burst benefit
+ * only from throughput, as discussed in the comments to bfq_handle_burst.
+ * In this respect, expiring bfqq certainly boosts the throughput on NCQ-
+ * capable flash-based devices, whereas, on rotational devices, it boosts
+ * the throughput only if bfqq contains random requests.
+ *
+ * On the opposite end, if (a) bfqq is sync, (b) the above burst-related
+ * condition does not hold, and (c) bfqq is being weight-raised, then the
+ * expression always evaluates to true, as device idling is instrumental
+ * for preserving low-latency guarantees (see [1]). If, instead, conditions
+ * (a) and (b) do hold, but (c) does not, then the expression evaluates to
+ * true only if: (1) bfqq is I/O-bound and has a non-null idle window, and
+ * (2) at least one of the following two conditions holds.
+ * The first condition is that the device is not performing NCQ, because
+ * idling the device most certainly boosts the throughput if this condition
+ * holds and bfqq is I/O-bound and has been granted a non-null idle window.
+ * The second compound condition is made of the logical AND of two components.
+ *
+ * The first component is true only if there is no weight-raised busy
+ * queue. This guarantees that the device is not idled for a sync non-
+ * weight-raised queue when there are busy weight-raised queues. The former
+ * is then expired immediately if empty. Combined with the timestamping
+ * rules of BFQ (see [1] for details), this causes sync non-weight-raised
+ * queues to get a lower number of requests served, and hence to ask for a
+ * lower number of requests from the request pool, before the busy weight-
+ * raised queues get served again.
+ *
+ * This is beneficial for the processes associated with weight-raised
+ * queues, when the request pool is saturated (e.g., in the presence of
+ * write hogs). In fact, if the processes associated with the other queues
+ * ask for requests at a lower rate, then weight-raised processes have a
+ * higher probability to get a request from the pool immediately (or at
+ * least soon) when they need one. Hence they have a higher probability to
+ * actually get a fraction of the disk throughput proportional to their
+ * high weight. This is especially true with NCQ-capable drives, which
+ * enqueue several requests in advance and further reorder internally-
+ * queued requests.
+ *
+ * In the end, mistreating non-weight-raised queues when there are busy
+ * weight-raised queues seems to mitigate starvation problems in the
+ * presence of heavy write workloads and NCQ, and hence to guarantee a
+ * higher application and system responsiveness in these hostile scenarios.
+ *
+ * If the first component of the compound condition is instead true, i.e.,
+ * there is no weight-raised busy queue, then the second component of the
+ * compound condition takes into account service-guarantee and throughput
+ * issues related to NCQ (recall that the compound condition is evaluated
+ * only if the device is detected as supporting NCQ).
+ *
+ * As for service guarantees, allowing the drive to enqueue more than one
+ * request at a time, and hence delegating de facto final scheduling
+ * decisions to the drive's internal scheduler, causes loss of control on
+ * the actual request service order. In this respect, when the drive is
+ * allowed to enqueue more than one request at a time, the service
+ * distribution enforced by the drive's internal scheduler is likely to
+ * coincide with the desired device-throughput distribution only in the
+ * following, perfectly symmetric, scenario:
+ * 1) all active queues have the same weight,
+ * 2) all active groups at the same level in the groups tree have the same
+ *    weight,
+ * 3) all active groups at the same level in the groups tree have the same
+ *    number of children.
+ *
+ * Even in such a scenario, sequential I/O may still receive a preferential
+ * treatment, but this is not likely to be a big issue with flash-based
+ * devices, because of their non-dramatic loss of throughput with random
+ * I/O. Things do differ with HDDs, for which additional care is taken, as
+ * explained after completing the discussion for flash-based devices.
+ *
+ * Unfortunately, keeping the necessary state for evaluating exactly the
+ * above symmetry conditions would be quite complex and time-consuming.
+ * Therefore BFQ evaluates instead the following stronger sub-conditions,
+ * for which it is much easier to maintain the needed state:
+ * 1) all active queues have the same weight,
+ * 2) all active groups have the same weight,
+ * 3) all active groups have at most one active child each.
+ * In particular, the last two conditions are always true if hierarchical
+ * support and the cgroups interface are not enabled, hence no state needs
+ * to be maintained in this case.
+ *
+ * According to the above considerations, the second component of the
+ * compound condition evaluates to true if any of the above symmetry
+ * sub-condition does not hold, or the device is not flash-based. Therefore,
+ * if also the first component is true, then idling is allowed for a sync
+ * queue. These are the only sub-conditions considered if the device is
+ * flash-based, as, for such a device, it is sensible to force idling only
+ * for service-guarantee issues. In fact, as for throughput, idling
+ * NCQ-capable flash-based devices would not boost the throughput even
+ * with sequential I/O; rather it would lower the throughput in proportion
+ * to how fast the device is. In the end, (only) if all the three
+ * sub-conditions hold and the device is flash-based, the compound
+ * condition evaluates to false and therefore no idling is performed.
+ *
+ * As already said, things change with a rotational device, where idling
+ * boosts the throughput with sequential I/O (even with NCQ). Hence, for
+ * such a device the second component of the compound condition evaluates
+ * to true also if the following additional sub-condition does not hold:
+ * the queue is constantly seeky. Unfortunately, this different behavior
+ * with respect to flash-based devices causes an additional asymmetry: if
+ * some sync queues enjoy idling and some other sync queues do not, then
+ * the latter get a low share of the device throughput, simply because the
+ * former get many requests served after being set as in service, whereas
+ * the latter do not. As a consequence, to guarantee the desired throughput
+ * distribution, on HDDs the compound expression evaluates to true (and
+ * hence device idling is performed) also if the following last symmetry
+ * condition does not hold: no other queue is benefiting from idling. Also
+ * this last condition is actually replaced with a simpler-to-maintain and
+ * stronger condition: there is no busy queue which is not constantly seeky
+ * (and hence may also benefit from idling).
+ *
+ * To sum up, when all the required symmetry and throughput-boosting
+ * sub-conditions hold, the second component of the compound condition
+ * evaluates to false, and hence no idling is performed. This helps to
+ * keep the drives' internal queues full on NCQ-capable devices, and hence
+ * to boost the throughput, without causing 'almost' any loss of service
+ * guarantees. The 'almost' follows from the fact that, if the internal
+ * queue of one such device is filled while all the sub-conditions hold,
+ * but at some point in time some sub-condition stops to hold, then it may
+ * become impossible to let requests be served in the new desired order
+ * until all the requests already queued in the device have been served.
+ */
+static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq)
+{
+	struct bfq_data *bfqd = bfqq->bfqd;
+#define cond_for_seeky_on_ncq_hdd (bfq_bfqq_constantly_seeky(bfqq) && \
+				   bfqd->busy_in_flight_queues == \
+				   bfqd->const_seeky_busy_in_flight_queues)
+
+#define cond_for_expiring_in_burst	(bfq_bfqq_in_large_burst(bfqq) && \
+					 bfqd->hw_tag && \
+					 (blk_queue_nonrot(bfqd->queue) || \
+					  bfq_bfqq_constantly_seeky(bfqq)))
+
+/*
+ * Condition for expiring a non-weight-raised queue (and hence not idling
+ * the device).
+ */
+#define cond_for_expiring_non_wr  (bfqd->hw_tag && \
+				   (bfqd->wr_busy_queues > 0 || \
+				    (blk_queue_nonrot(bfqd->queue) || \
+				      cond_for_seeky_on_ncq_hdd)))
+
+	return bfq_bfqq_sync(bfqq) &&
+		!cond_for_expiring_in_burst &&
+		(bfqq->wr_coeff > 1 || !symmetric_scenario ||
+		 (bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_idle_window(bfqq) &&
+		  !cond_for_expiring_non_wr)
+	);
+}
+
+/*
+ * If the in-service queue is empty but sync, and the function
+ * bfq_bfqq_must_not_expire returns true, then:
+ * 1) the queue must remain in service and cannot be expired, and
+ * 2) the disk must be idled to wait for the possible arrival of a new
+ *    request for the queue.
+ * See the comments to the function bfq_bfqq_must_not_expire for the reasons
+ * why performing device idling is the best choice to boost the throughput
+ * and preserve service guarantees when bfq_bfqq_must_not_expire itself
+ * returns true.
+ */
+static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
+{
+	struct bfq_data *bfqd = bfqq->bfqd;
+
+	return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 &&
+	       bfq_bfqq_must_not_expire(bfqq);
+}
+
+/*
+ * Select a queue for service.  If we have a current queue in service,
+ * check whether to continue servicing it, or retrieve and set a new one.
+ */
+static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq;
+	struct request *next_rq;
+	enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
+
+	bfqq = bfqd->in_service_queue;
+	if (bfqq == NULL)
+		goto new_queue;
+
+	bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");
+
+	if (bfq_may_expire_for_budg_timeout(bfqq) &&
+	    !timer_pending(&bfqd->idle_slice_timer) &&
+	    !bfq_bfqq_must_idle(bfqq))
+		goto expire;
+
+	next_rq = bfqq->next_rq;
+	/*
+	 * If bfqq has requests queued and it has enough budget left to
+	 * serve them, keep the queue, otherwise expire it.
+	 */
+	if (next_rq != NULL) {
+		if (bfq_serv_to_charge(next_rq, bfqq) >
+			bfq_bfqq_budget_left(bfqq)) {
+			reason = BFQ_BFQQ_BUDGET_EXHAUSTED;
+			goto expire;
+		} else {
+			/*
+			 * The idle timer may be pending because we may
+			 * not disable disk idling even when a new request
+			 * arrives.
+			 */
+			if (timer_pending(&bfqd->idle_slice_timer)) {
+				/*
+				 * If we get here: 1) at least a new request
+				 * has arrived but we have not disabled the
+				 * timer because the request was too small,
+				 * 2) then the block layer has unplugged
+				 * the device, causing the dispatch to be
+				 * invoked.
+				 *
+				 * Since the device is unplugged, now the
+				 * requests are probably large enough to
+				 * provide a reasonable throughput.
+				 * So we disable idling.
+				 */
+				bfq_clear_bfqq_wait_request(bfqq);
+				del_timer(&bfqd->idle_slice_timer);
+			}
+			goto keep_queue;
+		}
+	}
+
+	/*
+	 * No requests pending. However, if the in-service queue is idling
+	 * for a new request, or has requests waiting for a completion and
+	 * may idle after their completion, then keep it anyway.
+	 */
+	if (timer_pending(&bfqd->idle_slice_timer) ||
+	    (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) {
+		bfqq = NULL;
+		goto keep_queue;
+	}
+
+	reason = BFQ_BFQQ_NO_MORE_REQUESTS;
+expire:
+	bfq_bfqq_expire(bfqd, bfqq, 0, reason);
+new_queue:
+	bfqq = bfq_set_in_service_queue(bfqd);
+	bfq_log(bfqd, "select_queue: new queue %d returned",
+		bfqq != NULL ? bfqq->pid : 0);
+keep_queue:
+	return bfqq;
+}
+
+static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+	if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */
+		bfq_log_bfqq(bfqd, bfqq,
+			"raising period dur %u/%u msec, old coeff %u, w %d(%d)",
+			jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
+			jiffies_to_msecs(bfqq->wr_cur_max_time),
+			bfqq->wr_coeff,
+			bfqq->entity.weight, bfqq->entity.orig_weight);
+
+		BUG_ON(bfqq != bfqd->in_service_queue && entity->weight !=
+		       entity->orig_weight * bfqq->wr_coeff);
+		if (entity->ioprio_changed)
+			bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
+
+		/*
+		 * If the queue was activated in a burst, or
+		 * too much time has elapsed from the beginning
+		 * of this weight-raising period, or the queue has
+		 * exceeded the acceptable number of cooperations,
+		 * then end weight raising.
+		 */
+		if (bfq_bfqq_in_large_burst(bfqq) ||
+		    bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh ||
+		    time_is_before_jiffies(bfqq->last_wr_start_finish +
+					   bfqq->wr_cur_max_time)) {
+			bfqq->last_wr_start_finish = jiffies;
+			bfq_log_bfqq(bfqd, bfqq,
+				     "wrais ending at %lu, rais_max_time %u",
+				     bfqq->last_wr_start_finish,
+				     jiffies_to_msecs(bfqq->wr_cur_max_time));
+			bfq_bfqq_end_wr(bfqq);
+		}
+	}
+	/* Update weight both if it must be raised and if it must be lowered */
+	if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
+		__bfq_entity_update_weight_prio(
+			bfq_entity_service_tree(entity),
+			entity);
+}
+
+/*
+ * Dispatch one request from bfqq, moving it to the request queue
+ * dispatch list.
+ */
+static int bfq_dispatch_request(struct bfq_data *bfqd,
+				struct bfq_queue *bfqq)
+{
+	int dispatched = 0;
+	struct request *rq;
+	unsigned long service_to_charge;
+
+	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
+
+	/* Follow expired path, else get first next available. */
+	rq = bfq_check_fifo(bfqq);
+	if (rq == NULL)
+		rq = bfqq->next_rq;
+	service_to_charge = bfq_serv_to_charge(rq, bfqq);
+
+	if (service_to_charge > bfq_bfqq_budget_left(bfqq)) {
+		/*
+		 * This may happen if the next rq is chosen in fifo order
+		 * instead of sector order. The budget is properly
+		 * dimensioned to be always sufficient to serve the next
+		 * request only if it is chosen in sector order. The reason
+		 * is that it would be quite inefficient and little useful
+		 * to always make sure that the budget is large enough to
+		 * serve even the possible next rq in fifo order.
+		 * In fact, requests are seldom served in fifo order.
+		 *
+		 * Expire the queue for budget exhaustion, and make sure
+		 * that the next act_budget is enough to serve the next
+		 * request, even if it comes from the fifo expired path.
+		 */
+		bfqq->next_rq = rq;
+		/*
+		 * Since this dispatch is failed, make sure that
+		 * a new one will be performed
+		 */
+		if (!bfqd->rq_in_driver)
+			bfq_schedule_dispatch(bfqd);
+		goto expire;
+	}
+
+	/* Finally, insert request into driver dispatch list. */
+	bfq_bfqq_served(bfqq, service_to_charge);
+	bfq_dispatch_insert(bfqd->queue, rq);
+
+	bfq_update_wr_data(bfqd, bfqq);
+
+	bfq_log_bfqq(bfqd, bfqq,
+			"dispatched %u sec req (%llu), budg left %lu",
+			blk_rq_sectors(rq),
+			(long long unsigned)blk_rq_pos(rq),
+			bfq_bfqq_budget_left(bfqq));
+
+	dispatched++;
+
+	if (bfqd->in_service_bic == NULL) {
+		atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount);
+		bfqd->in_service_bic = RQ_BIC(rq);
+	}
+
+	if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) &&
+	    dispatched >= bfqd->bfq_max_budget_async_rq) ||
+	    bfq_class_idle(bfqq)))
+		goto expire;
+
+	return dispatched;
+
+expire:
+	bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED);
+	return dispatched;
+}
+
+static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq)
+{
+	int dispatched = 0;
+
+	while (bfqq->next_rq != NULL) {
+		bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq);
+		dispatched++;
+	}
+
+	BUG_ON(!list_empty(&bfqq->fifo));
+	return dispatched;
+}
+
+/*
+ * Drain our current requests.
+ * Used for barriers and when switching io schedulers on-the-fly.
+ */
+static int bfq_forced_dispatch(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq, *n;
+	struct bfq_service_tree *st;
+	int dispatched = 0;
+
+	bfqq = bfqd->in_service_queue;
+	if (bfqq != NULL)
+		__bfq_bfqq_expire(bfqd, bfqq);
+
+	/*
+	 * Loop through classes, and be careful to leave the scheduler
+	 * in a consistent state, as feedback mechanisms and vtime
+	 * updates cannot be disabled during the process.
+	 */
+	list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) {
+		st = bfq_entity_service_tree(&bfqq->entity);
+
+		dispatched += __bfq_forced_dispatch_bfqq(bfqq);
+		bfqq->max_budget = bfq_max_budget(bfqd);
+
+		bfq_forget_idle(st);
+	}
+
+	BUG_ON(bfqd->busy_queues != 0);
+
+	return dispatched;
+}
+
+static int bfq_dispatch_requests(struct request_queue *q, int force)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct bfq_queue *bfqq;
+	int max_dispatch;
+
+	bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
+	if (bfqd->busy_queues == 0)
+		return 0;
+
+	if (unlikely(force))
+		return bfq_forced_dispatch(bfqd);
+
+	bfqq = bfq_select_queue(bfqd);
+	if (bfqq == NULL)
+		return 0;
+
+	if (bfq_class_idle(bfqq))
+		max_dispatch = 1;
+
+	if (!bfq_bfqq_sync(bfqq))
+		max_dispatch = bfqd->bfq_max_budget_async_rq;
+
+	if (!bfq_bfqq_sync(bfqq) && bfqq->dispatched >= max_dispatch) {
+		if (bfqd->busy_queues > 1)
+			return 0;
+		if (bfqq->dispatched >= 4 * max_dispatch)
+			return 0;
+	}
+
+	if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq))
+		return 0;
+
+	bfq_clear_bfqq_wait_request(bfqq);
+	BUG_ON(timer_pending(&bfqd->idle_slice_timer));
+
+	if (!bfq_dispatch_request(bfqd, bfqq))
+		return 0;
+
+	bfq_log_bfqq(bfqd, bfqq, "dispatched %s request",
+			bfq_bfqq_sync(bfqq) ? "sync" : "async");
+
+	return 1;
+}
+
+/*
+ * Task holds one reference to the queue, dropped when task exits.  Each rq
+ * in-flight on this queue also holds a reference, dropped when rq is freed.
+ *
+ * Queue lock must be held here.
+ */
+static void bfq_put_queue(struct bfq_queue *bfqq)
+{
+	struct bfq_data *bfqd = bfqq->bfqd;
+
+	BUG_ON(atomic_read(&bfqq->ref) <= 0);
+
+	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq,
+		     atomic_read(&bfqq->ref));
+	if (!atomic_dec_and_test(&bfqq->ref))
+		return;
+
+	BUG_ON(rb_first(&bfqq->sort_list) != NULL);
+	BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
+	BUG_ON(bfqq->entity.tree != NULL);
+	BUG_ON(bfq_bfqq_busy(bfqq));
+	BUG_ON(bfqd->in_service_queue == bfqq);
+
+	if (bfq_bfqq_sync(bfqq))
+		/*
+		 * The fact that this queue is being destroyed does not
+		 * invalidate the fact that this queue may have been
+		 * activated during the current burst. As a consequence,
+		 * although the queue does not exist anymore, and hence
+		 * needs to be removed from the burst list if there,
+		 * the burst size has not to be decremented.
+		 */
+		hlist_del_init(&bfqq->burst_list_node);
+
+	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq);
+
+	kmem_cache_free(bfq_pool, bfqq);
+}
+
+static void bfq_put_cooperator(struct bfq_queue *bfqq)
+{
+	struct bfq_queue *__bfqq, *next;
+
+	/*
+	 * If this queue was scheduled to merge with another queue, be
+	 * sure to drop the reference taken on that queue (and others in
+	 * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.
+	 */
+	__bfqq = bfqq->new_bfqq;
+	while (__bfqq) {
+		if (__bfqq == bfqq)
+			break;
+		next = __bfqq->new_bfqq;
+		bfq_put_queue(__bfqq);
+		__bfqq = next;
+	}
+}
+
+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	if (bfqq == bfqd->in_service_queue) {
+		__bfq_bfqq_expire(bfqd, bfqq);
+		bfq_schedule_dispatch(bfqd);
+	}
+
+	bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,
+		     atomic_read(&bfqq->ref));
+
+	bfq_put_cooperator(bfqq);
+
+	bfq_put_queue(bfqq);
+}
+
+static inline void bfq_init_icq(struct io_cq *icq)
+{
+	struct bfq_io_cq *bic = icq_to_bic(icq);
+
+	bic->ttime.last_end_request = jiffies;
+	/*
+	 * A newly created bic indicates that the process has just
+	 * started doing I/O, and is probably mapping into memory its
+	 * executable and libraries: it definitely needs weight raising.
+	 * There is however the possibility that the process performs,
+	 * for a while, I/O close to some other process. EQM intercepts
+	 * this behavior and may merge the queue corresponding to the
+	 * process  with some other queue, BEFORE the weight of the queue
+	 * is raised. Merged queues are not weight-raised (they are assumed
+	 * to belong to processes that benefit only from high throughput).
+	 * If the merge is basically the consequence of an accident, then
+	 * the queue will be split soon and will get back its old weight.
+	 * It is then important to write down somewhere that this queue
+	 * does need weight raising, even if it did not make it to get its
+	 * weight raised before being merged. To this purpose, we overload
+	 * the field raising_time_left and assign 1 to it, to mark the queue
+	 * as needing weight raising.
+	 */
+	bic->wr_time_left = 1;
+}
+
+static void bfq_exit_icq(struct io_cq *icq)
+{
+	struct bfq_io_cq *bic = icq_to_bic(icq);
+	struct bfq_data *bfqd = bic_to_bfqd(bic);
+
+	if (bic->bfqq[BLK_RW_ASYNC]) {
+		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]);
+		bic->bfqq[BLK_RW_ASYNC] = NULL;
+	}
+
+	if (bic->bfqq[BLK_RW_SYNC]) {
+		/*
+		 * If the bic is using a shared queue, put the reference
+		 * taken on the io_context when the bic started using a
+		 * shared bfq_queue.
+		 */
+		if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC]))
+			put_io_context(icq->ioc);
+		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
+		bic->bfqq[BLK_RW_SYNC] = NULL;
+	}
+}
+
+/*
+ * Update the entity prio values; note that the new values will not
+ * be used until the next (re)activation.
+ */
+static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+{
+	struct task_struct *tsk = current;
+	int ioprio_class;
+
+	ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
+	switch (ioprio_class) {
+	default:
+		dev_err(bfqq->bfqd->queue->backing_dev_info.dev,
+			"bfq: bad prio class %d\n", ioprio_class);
+	case IOPRIO_CLASS_NONE:
+		/*
+		 * No prio set, inherit CPU scheduling settings.
+		 */
+		bfqq->entity.new_ioprio = task_nice_ioprio(tsk);
+		bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk);
+		break;
+	case IOPRIO_CLASS_RT:
+		bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT;
+		break;
+	case IOPRIO_CLASS_BE:
+		bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE;
+		break;
+	case IOPRIO_CLASS_IDLE:
+		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE;
+		bfqq->entity.new_ioprio = 7;
+		bfq_clear_bfqq_idle_window(bfqq);
+		break;
+	}
+
+	if (bfqq->entity.new_ioprio < 0 ||
+	    bfqq->entity.new_ioprio >= IOPRIO_BE_NR) {
+		printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n",
+				 bfqq->entity.new_ioprio);
+		BUG();
+	}
+
+	bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->entity.new_ioprio);
+	bfqq->entity.ioprio_changed = 1;
+}
+
+static void bfq_check_ioprio_change(struct bfq_io_cq *bic)
+{
+	struct bfq_data *bfqd;
+	struct bfq_queue *bfqq, *new_bfqq;
+	struct bfq_group *bfqg;
+	unsigned long uninitialized_var(flags);
+	int ioprio = bic->icq.ioc->ioprio;
+
+	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
+				   &flags);
+	/*
+	 * This condition may trigger on a newly created bic, be sure to
+	 * drop the lock before returning.
+	 */
+	if (unlikely(bfqd == NULL) || likely(bic->ioprio == ioprio))
+		goto out;
+
+	bic->ioprio = ioprio;
+
+	bfqq = bic->bfqq[BLK_RW_ASYNC];
+	if (bfqq != NULL) {
+		bfqg = container_of(bfqq->entity.sched_data, struct bfq_group,
+				    sched_data);
+		new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, bic,
+					 GFP_ATOMIC);
+		if (new_bfqq != NULL) {
+			bic->bfqq[BLK_RW_ASYNC] = new_bfqq;
+			bfq_log_bfqq(bfqd, bfqq,
+				     "check_ioprio_change: bfqq %p %d",
+				     bfqq, atomic_read(&bfqq->ref));
+			bfq_put_queue(bfqq);
+		}
+	}
+
+	bfqq = bic->bfqq[BLK_RW_SYNC];
+	if (bfqq != NULL)
+		bfq_set_next_ioprio_data(bfqq, bic);
+
+out:
+	bfq_put_bfqd_unlock(bfqd, &flags);
+}
+
+static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			  struct bfq_io_cq *bic, pid_t pid, int is_sync)
+{
+	RB_CLEAR_NODE(&bfqq->entity.rb_node);
+	INIT_LIST_HEAD(&bfqq->fifo);
+	INIT_HLIST_NODE(&bfqq->burst_list_node);
+
+	atomic_set(&bfqq->ref, 0);
+	bfqq->bfqd = bfqd;
+
+	if (bic)
+		bfq_set_next_ioprio_data(bfqq, bic);
+
+	if (is_sync) {
+		if (!bfq_class_idle(bfqq))
+			bfq_mark_bfqq_idle_window(bfqq);
+		bfq_mark_bfqq_sync(bfqq);
+	}
+	bfq_mark_bfqq_IO_bound(bfqq);
+
+	/* Tentative initial value to trade off between thr and lat */
+	bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
+	bfqq->pid = pid;
+
+	bfqq->wr_coeff = 1;
+	bfqq->last_wr_start_finish = 0;
+	/*
+	 * Set to the value for which bfqq will not be deemed as
+	 * soft rt when it becomes backlogged.
+	 */
+	bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies);
+}
+
+static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd,
+					      struct bfq_group *bfqg,
+					      int is_sync,
+					      struct bfq_io_cq *bic,
+					      gfp_t gfp_mask)
+{
+	struct bfq_queue *bfqq, *new_bfqq = NULL;
+
+retry:
+	/* bic always exists here */
+	bfqq = bic_to_bfqq(bic, is_sync);
+
+	/*
+	 * Always try a new alloc if we fall back to the OOM bfqq
+	 * originally, since it should just be a temporary situation.
+	 */
+	if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {
+		bfqq = NULL;
+		if (new_bfqq != NULL) {
+			bfqq = new_bfqq;
+			new_bfqq = NULL;
+		} else if (gfp_mask & __GFP_WAIT) {
+			spin_unlock_irq(bfqd->queue->queue_lock);
+			new_bfqq = kmem_cache_alloc_node(bfq_pool,
+					gfp_mask | __GFP_ZERO,
+					bfqd->queue->node);
+			spin_lock_irq(bfqd->queue->queue_lock);
+			if (new_bfqq != NULL)
+				goto retry;
+		} else {
+			bfqq = kmem_cache_alloc_node(bfq_pool,
+					gfp_mask | __GFP_ZERO,
+					bfqd->queue->node);
+		}
+
+		if (bfqq != NULL) {
+			bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
+                                      is_sync);
+			bfq_init_entity(&bfqq->entity, bfqg);
+			bfq_log_bfqq(bfqd, bfqq, "allocated");
+		} else {
+			bfqq = &bfqd->oom_bfqq;
+			bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
+		}
+	}
+
+	if (new_bfqq != NULL)
+		kmem_cache_free(bfq_pool, new_bfqq);
+
+	return bfqq;
+}
+
+static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
+					       struct bfq_group *bfqg,
+					       int ioprio_class, int ioprio)
+{
+	switch (ioprio_class) {
+	case IOPRIO_CLASS_RT:
+		return &bfqg->async_bfqq[0][ioprio];
+	case IOPRIO_CLASS_NONE:
+		ioprio = IOPRIO_NORM;
+		/* fall through */
+	case IOPRIO_CLASS_BE:
+		return &bfqg->async_bfqq[1][ioprio];
+	case IOPRIO_CLASS_IDLE:
+		return &bfqg->async_idle_bfqq;
+	default:
+		BUG();
+	}
+}
+
+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+				       struct bfq_group *bfqg, int is_sync,
+				       struct bfq_io_cq *bic, gfp_t gfp_mask)
+{
+	const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+	const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
+	struct bfq_queue **async_bfqq = NULL;
+	struct bfq_queue *bfqq = NULL;
+
+	if (!is_sync) {
+		async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
+						  ioprio);
+		bfqq = *async_bfqq;
+	}
+
+	if (bfqq == NULL)
+		bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, bic, gfp_mask);
+
+	/*
+	 * Pin the queue now that it's allocated, scheduler exit will
+	 * prune it.
+	 */
+	if (!is_sync && *async_bfqq == NULL) {
+		atomic_inc(&bfqq->ref);
+		bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",
+			     bfqq, atomic_read(&bfqq->ref));
+		*async_bfqq = bfqq;
+	}
+
+	atomic_inc(&bfqq->ref);
+	bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq,
+		     atomic_read(&bfqq->ref));
+	return bfqq;
+}
+
+static void bfq_update_io_thinktime(struct bfq_data *bfqd,
+				    struct bfq_io_cq *bic)
+{
+	unsigned long elapsed = jiffies - bic->ttime.last_end_request;
+	unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle);
+
+	bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8;
+	bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8;
+	bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) /
+				bic->ttime.ttime_samples;
+}
+
+static void bfq_update_io_seektime(struct bfq_data *bfqd,
+				   struct bfq_queue *bfqq,
+				   struct request *rq)
+{
+	sector_t sdist;
+	u64 total;
+
+	if (bfqq->last_request_pos < blk_rq_pos(rq))
+		sdist = blk_rq_pos(rq) - bfqq->last_request_pos;
+	else
+		sdist = bfqq->last_request_pos - blk_rq_pos(rq);
+
+	/*
+	 * Don't allow the seek distance to get too large from the
+	 * odd fragment, pagein, etc.
+	 */
+	if (bfqq->seek_samples == 0) /* first request, not really a seek */
+		sdist = 0;
+	else if (bfqq->seek_samples <= 60) /* second & third seek */
+		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024);
+	else
+		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64);
+
+	bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8;
+	bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8;
+	total = bfqq->seek_total + (bfqq->seek_samples/2);
+	do_div(total, bfqq->seek_samples);
+	bfqq->seek_mean = (sector_t)total;
+
+	bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist,
+			(u64)bfqq->seek_mean);
+}
+
+/*
+ * Disable idle window if the process thinks too long or seeks so much that
+ * it doesn't matter.
+ */
+static void bfq_update_idle_window(struct bfq_data *bfqd,
+				   struct bfq_queue *bfqq,
+				   struct bfq_io_cq *bic)
+{
+	int enable_idle;
+
+	/* Don't idle for async or idle io prio class. */
+	if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
+		return;
+
+	/* Idle window just restored, statistics are meaningless. */
+	if (bfq_bfqq_just_split(bfqq))
+		return;
+
+	enable_idle = bfq_bfqq_idle_window(bfqq);
+
+	if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
+	    bfqd->bfq_slice_idle == 0 ||
+		(bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&
+			bfqq->wr_coeff == 1))
+		enable_idle = 0;
+	else if (bfq_sample_valid(bic->ttime.ttime_samples)) {
+		if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle &&
+			bfqq->wr_coeff == 1)
+			enable_idle = 0;
+		else
+			enable_idle = 1;
+	}
+	bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d",
+		enable_idle);
+
+	if (enable_idle)
+		bfq_mark_bfqq_idle_window(bfqq);
+	else
+		bfq_clear_bfqq_idle_window(bfqq);
+}
+
+/*
+ * Called when a new fs request (rq) is added to bfqq.  Check if there's
+ * something we should do about it.
+ */
+static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			    struct request *rq)
+{
+	struct bfq_io_cq *bic = RQ_BIC(rq);
+
+	if (rq->cmd_flags & REQ_META)
+		bfqq->meta_pending++;
+
+	bfq_update_io_thinktime(bfqd, bic);
+	bfq_update_io_seektime(bfqd, bfqq, rq);
+	if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) {
+		bfq_clear_bfqq_constantly_seeky(bfqq);
+		if (!blk_queue_nonrot(bfqd->queue)) {
+			BUG_ON(!bfqd->const_seeky_busy_in_flight_queues);
+			bfqd->const_seeky_busy_in_flight_queues--;
+		}
+	}
+	if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
+	    !BFQQ_SEEKY(bfqq))
+		bfq_update_idle_window(bfqd, bfqq, bic);
+	bfq_clear_bfqq_just_split(bfqq);
+
+	bfq_log_bfqq(bfqd, bfqq,
+		     "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
+		     bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq),
+		     (long long unsigned)bfqq->seek_mean);
+
+	bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
+
+	if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) {
+		int small_req = bfqq->queued[rq_is_sync(rq)] == 1 &&
+				blk_rq_sectors(rq) < 32;
+		int budget_timeout = bfq_bfqq_budget_timeout(bfqq);
+
+		/*
+		 * There is just this request queued: if the request
+		 * is small and the queue is not to be expired, then
+		 * just exit.
+		 *
+		 * In this way, if the disk is being idled to wait for
+		 * a new request from the in-service queue, we avoid
+		 * unplugging the device and committing the disk to serve
+		 * just a small request. On the contrary, we wait for
+		 * the block layer to decide when to unplug the device:
+		 * hopefully, new requests will be merged to this one
+		 * quickly, then the device will be unplugged and
+		 * larger requests will be dispatched.
+		 */
+		if (small_req && !budget_timeout)
+			return;
+
+		/*
+		 * A large enough request arrived, or the queue is to
+		 * be expired: in both cases disk idling is to be
+		 * stopped, so clear wait_request flag and reset
+		 * timer.
+		 */
+		bfq_clear_bfqq_wait_request(bfqq);
+		del_timer(&bfqd->idle_slice_timer);
+
+		/*
+		 * The queue is not empty, because a new request just
+		 * arrived. Hence we can safely expire the queue, in
+		 * case of budget timeout, without risking that the
+		 * timestamps of the queue are not updated correctly.
+		 * See [1] for more details.
+		 */
+		if (budget_timeout)
+			bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);
+
+		/*
+		 * Let the request rip immediately, or let a new queue be
+		 * selected if bfqq has just been expired.
+		 */
+		__blk_run_queue(bfqd->queue);
+	}
+}
+
+static void bfq_insert_request(struct request_queue *q, struct request *rq)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq;
+
+	assert_spin_locked(bfqd->queue->queue_lock);
+
+	/*
+	 * An unplug may trigger a requeue of a request from the device
+	 * driver: make sure we are in process context while trying to
+	 * merge two bfq_queues.
+	 */
+	if (!in_interrupt()) {
+		new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
+		if (new_bfqq != NULL) {
+			if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
+				new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
+			/*
+			 * Release the request's reference to the old bfqq
+			 * and make sure one is taken to the shared queue.
+			 */
+			new_bfqq->allocated[rq_data_dir(rq)]++;
+			bfqq->allocated[rq_data_dir(rq)]--;
+			atomic_inc(&new_bfqq->ref);
+			bfq_put_queue(bfqq);
+			if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
+				bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
+						bfqq, new_bfqq);
+			rq->elv.priv[1] = new_bfqq;
+			bfqq = new_bfqq;
+		} else
+			bfq_bfqq_increase_failed_cooperations(bfqq);
+	}
+
+	bfq_add_request(rq);
+
+	/*
+	 * Here a newly-created bfq_queue has already started a weight-raising
+	 * period: clear raising_time_left to prevent bfq_bfqq_save_state()
+	 * from assigning it a full weight-raising period. See the detailed
+	 * comments about this field in bfq_init_icq().
+	 */
+	if (bfqq->bic != NULL)
+		bfqq->bic->wr_time_left = 0;
+	rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+	list_add_tail(&rq->queuelist, &bfqq->fifo);
+
+	bfq_rq_enqueued(bfqd, bfqq, rq);
+}
+
+static void bfq_update_hw_tag(struct bfq_data *bfqd)
+{
+	bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver,
+				     bfqd->rq_in_driver);
+
+	if (bfqd->hw_tag == 1)
+		return;
+
+	/*
+	 * This sample is valid if the number of outstanding requests
+	 * is large enough to allow a queueing behavior.  Note that the
+	 * sum is not exact, as it's not taking into account deactivated
+	 * requests.
+	 */
+	if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)
+		return;
+
+	if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)
+		return;
+
+	bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD;
+	bfqd->max_rq_in_driver = 0;
+	bfqd->hw_tag_samples = 0;
+}
+
+static void bfq_completed_request(struct request_queue *q, struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+	struct bfq_data *bfqd = bfqq->bfqd;
+	bool sync = bfq_bfqq_sync(bfqq);
+
+	bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)",
+		     blk_rq_sectors(rq), sync);
+
+	bfq_update_hw_tag(bfqd);
+
+	BUG_ON(!bfqd->rq_in_driver);
+	BUG_ON(!bfqq->dispatched);
+	bfqd->rq_in_driver--;
+	bfqq->dispatched--;
+
+	if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
+		bfq_weights_tree_remove(bfqd, &bfqq->entity,
+					&bfqd->queue_weights_tree);
+		if (!blk_queue_nonrot(bfqd->queue)) {
+			BUG_ON(!bfqd->busy_in_flight_queues);
+			bfqd->busy_in_flight_queues--;
+			if (bfq_bfqq_constantly_seeky(bfqq)) {
+				BUG_ON(!bfqd->
+					const_seeky_busy_in_flight_queues);
+				bfqd->const_seeky_busy_in_flight_queues--;
+			}
+		}
+	}
+
+	if (sync) {
+		bfqd->sync_flight--;
+		RQ_BIC(rq)->ttime.last_end_request = jiffies;
+	}
+
+	/*
+	 * If we are waiting to discover whether the request pattern of the
+	 * task associated with the queue is actually isochronous, and
+	 * both requisites for this condition to hold are satisfied, then
+	 * compute soft_rt_next_start (see the comments to the function
+	 * bfq_bfqq_softrt_next_start()).
+	 */
+	if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
+	    RB_EMPTY_ROOT(&bfqq->sort_list))
+		bfqq->soft_rt_next_start =
+			bfq_bfqq_softrt_next_start(bfqd, bfqq);
+
+	/*
+	 * If this is the in-service queue, check if it needs to be expired,
+	 * or if we want to idle in case it has no pending requests.
+	 */
+	if (bfqd->in_service_queue == bfqq) {
+		if (bfq_bfqq_budget_new(bfqq))
+			bfq_set_budget_timeout(bfqd);
+
+		if (bfq_bfqq_must_idle(bfqq)) {
+			bfq_arm_slice_timer(bfqd);
+			goto out;
+		} else if (bfq_may_expire_for_budg_timeout(bfqq))
+			bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);
+		else if (RB_EMPTY_ROOT(&bfqq->sort_list) &&
+			 (bfqq->dispatched == 0 ||
+			  !bfq_bfqq_must_not_expire(bfqq)))
+			bfq_bfqq_expire(bfqd, bfqq, 0,
+					BFQ_BFQQ_NO_MORE_REQUESTS);
+	}
+
+	if (!bfqd->rq_in_driver)
+		bfq_schedule_dispatch(bfqd);
+
+out:
+	return;
+}
+
+static inline int __bfq_may_queue(struct bfq_queue *bfqq)
+{
+	if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) {
+		bfq_clear_bfqq_must_alloc(bfqq);
+		return ELV_MQUEUE_MUST;
+	}
+
+	return ELV_MQUEUE_MAY;
+}
+
+static int bfq_may_queue(struct request_queue *q, int rw)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct task_struct *tsk = current;
+	struct bfq_io_cq *bic;
+	struct bfq_queue *bfqq;
+
+	/*
+	 * Don't force setup of a queue from here, as a call to may_queue
+	 * does not necessarily imply that a request actually will be
+	 * queued. So just lookup a possibly existing queue, or return
+	 * 'may queue' if that fails.
+	 */
+	bic = bfq_bic_lookup(bfqd, tsk->io_context);
+	if (bic == NULL)
+		return ELV_MQUEUE_MAY;
+
+	bfqq = bic_to_bfqq(bic, rw_is_sync(rw));
+	if (bfqq != NULL)
+		return __bfq_may_queue(bfqq);
+
+	return ELV_MQUEUE_MAY;
+}
+
+/*
+ * Queue lock held here.
+ */
+static void bfq_put_request(struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+
+	if (bfqq != NULL) {
+		const int rw = rq_data_dir(rq);
+
+		BUG_ON(!bfqq->allocated[rw]);
+		bfqq->allocated[rw]--;
+
+		rq->elv.priv[0] = NULL;
+		rq->elv.priv[1] = NULL;
+
+		bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",
+			     bfqq, atomic_read(&bfqq->ref));
+		bfq_put_queue(bfqq);
+	}
+}
+
+/*
+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
+ * was the last process referring to said bfqq.
+ */
+static struct bfq_queue *
+bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
+{
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
+
+	put_io_context(bic->icq.ioc);
+
+	if (bfqq_process_refs(bfqq) == 1) {
+		bfqq->pid = current->pid;
+		bfq_clear_bfqq_coop(bfqq);
+		bfq_clear_bfqq_split_coop(bfqq);
+		return bfqq;
+	}
+
+	bic_set_bfqq(bic, NULL, 1);
+
+	bfq_put_cooperator(bfqq);
+
+	bfq_put_queue(bfqq);
+	return NULL;
+}
+
+/*
+ * Allocate bfq data structures associated with this request.
+ */
+static int bfq_set_request(struct request_queue *q, struct request *rq,
+			   struct bio *bio, gfp_t gfp_mask)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
+	const int rw = rq_data_dir(rq);
+	const int is_sync = rq_is_sync(rq);
+	struct bfq_queue *bfqq;
+	struct bfq_group *bfqg;
+	unsigned long flags;
+	bool split = false;
+
+	might_sleep_if(gfp_mask & __GFP_WAIT);
+
+	bfq_check_ioprio_change(bic);
+
+	spin_lock_irqsave(q->queue_lock, flags);
+
+	if (bic == NULL)
+		goto queue_fail;
+
+	bfqg = bfq_bic_update_cgroup(bic);
+
+new_queue:
+	bfqq = bic_to_bfqq(bic, is_sync);
+	if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {
+		bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask);
+		bic_set_bfqq(bic, bfqq, is_sync);
+		if (split && is_sync) {
+			if ((bic->was_in_burst_list && bfqd->large_burst) ||
+			    bic->saved_in_large_burst)
+				bfq_mark_bfqq_in_large_burst(bfqq);
+			else {
+			    bfq_clear_bfqq_in_large_burst(bfqq);
+			    if (bic->was_in_burst_list)
+			       hlist_add_head(&bfqq->burst_list_node,
+				              &bfqd->burst_list);
+			}
+		}
+	} else {
+		/* If the queue was seeky for too long, break it apart. */
+		if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
+			bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
+			bfqq = bfq_split_bfqq(bic, bfqq);
+			split = true;
+			if (!bfqq)
+				goto new_queue;
+		}
+	}
+
+	bfqq->allocated[rw]++;
+	atomic_inc(&bfqq->ref);
+	bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq,
+		     atomic_read(&bfqq->ref));
+
+	rq->elv.priv[0] = bic;
+	rq->elv.priv[1] = bfqq;
+
+	/*
+	 * If a bfq_queue has only one process reference, it is owned
+	 * by only one bfq_io_cq: we can set the bic field of the
+	 * bfq_queue to the address of that structure. Also, if the
+	 * queue has just been split, mark a flag so that the
+	 * information is available to the other scheduler hooks.
+	 */
+	if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
+		bfqq->bic = bic;
+		if (split) {
+			bfq_mark_bfqq_just_split(bfqq);
+			/*
+			 * If the queue has just been split from a shared
+			 * queue, restore the idle window and the possible
+			 * weight raising period.
+			 */
+			bfq_bfqq_resume_state(bfqq, bic);
+		}
+	}
+
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return 0;
+
+queue_fail:
+	bfq_schedule_dispatch(bfqd);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return 1;
+}
+
+static void bfq_kick_queue(struct work_struct *work)
+{
+	struct bfq_data *bfqd =
+		container_of(work, struct bfq_data, unplug_work);
+	struct request_queue *q = bfqd->queue;
+
+	spin_lock_irq(q->queue_lock);
+	__blk_run_queue(q);
+	spin_unlock_irq(q->queue_lock);
+}
+
+/*
+ * Handler of the expiration of the timer running if the in-service queue
+ * is idling inside its time slice.
+ */
+static void bfq_idle_slice_timer(unsigned long data)
+{
+	struct bfq_data *bfqd = (struct bfq_data *)data;
+	struct bfq_queue *bfqq;
+	unsigned long flags;
+	enum bfqq_expiration reason;
+
+	spin_lock_irqsave(bfqd->queue->queue_lock, flags);
+
+	bfqq = bfqd->in_service_queue;
+	/*
+	 * Theoretical race here: the in-service queue can be NULL or
+	 * different from the queue that was idling if the timer handler
+	 * spins on the queue_lock and a new request arrives for the
+	 * current queue and there is a full dispatch cycle that changes
+	 * the in-service queue.  This can hardly happen, but in the worst
+	 * case we just expire a queue too early.
+	 */
+	if (bfqq != NULL) {
+		bfq_log_bfqq(bfqd, bfqq, "slice_timer expired");
+		if (bfq_bfqq_budget_timeout(bfqq))
+			/*
+			 * Also here the queue can be safely expired
+			 * for budget timeout without wasting
+			 * guarantees
+			 */
+			reason = BFQ_BFQQ_BUDGET_TIMEOUT;
+		else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)
+			/*
+			 * The queue may not be empty upon timer expiration,
+			 * because we may not disable the timer when the
+			 * first request of the in-service queue arrives
+			 * during disk idling.
+			 */
+			reason = BFQ_BFQQ_TOO_IDLE;
+		else
+			goto schedule_dispatch;
+
+		bfq_bfqq_expire(bfqd, bfqq, 1, reason);
+	}
+
+schedule_dispatch:
+	bfq_schedule_dispatch(bfqd);
+
+	spin_unlock_irqrestore(bfqd->queue->queue_lock, flags);
+}
+
+static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
+{
+	del_timer_sync(&bfqd->idle_slice_timer);
+	cancel_work_sync(&bfqd->unplug_work);
+}
+
+static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd,
+					struct bfq_queue **bfqq_ptr)
+{
+	struct bfq_group *root_group = bfqd->root_group;
+	struct bfq_queue *bfqq = *bfqq_ptr;
+
+	bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
+	if (bfqq != NULL) {
+		bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group);
+		bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",
+			     bfqq, atomic_read(&bfqq->ref));
+		bfq_put_queue(bfqq);
+		*bfqq_ptr = NULL;
+	}
+}
+
+/*
+ * Release all the bfqg references to its async queues.  If we are
+ * deallocating the group these queues may still contain requests, so
+ * we reparent them to the root cgroup (i.e., the only one that will
+ * exist for sure until all the requests on a device are gone).
+ */
+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
+{
+	int i, j;
+
+	for (i = 0; i < 2; i++)
+		for (j = 0; j < IOPRIO_BE_NR; j++)
+			__bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);
+
+	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
+}
+
+static void bfq_exit_queue(struct elevator_queue *e)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	struct request_queue *q = bfqd->queue;
+	struct bfq_queue *bfqq, *n;
+
+	bfq_shutdown_timer_wq(bfqd);
+
+	spin_lock_irq(q->queue_lock);
+
+	BUG_ON(bfqd->in_service_queue != NULL);
+	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
+		bfq_deactivate_bfqq(bfqd, bfqq, 0);
+
+	bfq_disconnect_groups(bfqd);
+	spin_unlock_irq(q->queue_lock);
+
+	bfq_shutdown_timer_wq(bfqd);
+
+	synchronize_rcu();
+
+	BUG_ON(timer_pending(&bfqd->idle_slice_timer));
+
+	bfq_free_root_group(bfqd);
+	kfree(bfqd);
+}
+
+static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+{
+	struct bfq_group *bfqg;
+	struct bfq_data *bfqd;
+	struct elevator_queue *eq;
+
+	eq = elevator_alloc(q, e);
+	if (eq == NULL)
+		return -ENOMEM;
+
+	bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node);
+	if (bfqd == NULL) {
+		kobject_put(&eq->kobj);
+		return -ENOMEM;
+	}
+	eq->elevator_data = bfqd;
+
+	/*
+	 * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
+	 * Grab a permanent reference to it, so that the normal code flow
+	 * will not attempt to free it.
+	 */
+	bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
+	atomic_inc(&bfqd->oom_bfqq.ref);
+	bfqd->oom_bfqq.entity.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
+	bfqd->oom_bfqq.entity.new_ioprio_class = IOPRIO_CLASS_BE;
+	bfqd->oom_bfqq.entity.new_weight =
+		bfq_ioprio_to_weight(bfqd->oom_bfqq.entity.new_ioprio);
+	/*
+	 * Trigger weight initialization, according to ioprio, at the
+	 * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
+	 * class won't be changed any more.
+	 */
+	bfqd->oom_bfqq.entity.ioprio_changed = 1;
+
+	bfqd->queue = q;
+
+	spin_lock_irq(q->queue_lock);
+	q->elevator = eq;
+	spin_unlock_irq(q->queue_lock);
+
+	bfqg = bfq_alloc_root_group(bfqd, q->node);
+	if (bfqg == NULL) {
+		kfree(bfqd);
+		kobject_put(&eq->kobj);
+		return -ENOMEM;
+	}
+
+	bfqd->root_group = bfqg;
+	bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
+#ifdef CONFIG_CGROUP_BFQIO
+	bfqd->active_numerous_groups = 0;
+#endif
+
+	init_timer(&bfqd->idle_slice_timer);
+	bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
+	bfqd->idle_slice_timer.data = (unsigned long)bfqd;
+
+	bfqd->rq_pos_tree = RB_ROOT;
+	bfqd->queue_weights_tree = RB_ROOT;
+	bfqd->group_weights_tree = RB_ROOT;
+
+	INIT_WORK(&bfqd->unplug_work, bfq_kick_queue);
+
+	INIT_LIST_HEAD(&bfqd->active_list);
+	INIT_LIST_HEAD(&bfqd->idle_list);
+	INIT_HLIST_HEAD(&bfqd->burst_list);
+
+	bfqd->hw_tag = -1;
+
+	bfqd->bfq_max_budget = bfq_default_max_budget;
+
+	bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0];
+	bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1];
+	bfqd->bfq_back_max = bfq_back_max;
+	bfqd->bfq_back_penalty = bfq_back_penalty;
+	bfqd->bfq_slice_idle = bfq_slice_idle;
+	bfqd->bfq_class_idle_last_service = 0;
+	bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq;
+	bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;
+	bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;
+
+	bfqd->bfq_coop_thresh = 2;
+	bfqd->bfq_failed_cooperations = 7000;
+	bfqd->bfq_requests_within_timer = 120;
+
+	bfqd->bfq_large_burst_thresh = 11;
+	bfqd->bfq_burst_interval = msecs_to_jiffies(500);
+
+	bfqd->low_latency = true;
+
+	bfqd->bfq_wr_coeff = 20;
+	bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300);
+	bfqd->bfq_wr_max_time = 0;
+	bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000);
+	bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500);
+	bfqd->bfq_wr_max_softrt_rate = 7000; /*
+					      * Approximate rate required
+					      * to playback or record a
+					      * high-definition compressed
+					      * video.
+					      */
+	bfqd->wr_busy_queues = 0;
+	bfqd->busy_in_flight_queues = 0;
+	bfqd->const_seeky_busy_in_flight_queues = 0;
+
+	/*
+	 * Begin by assuming, optimistically, that the device peak rate is
+	 * equal to the highest reference rate.
+	 */
+	bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
+			T_fast[blk_queue_nonrot(bfqd->queue)];
+	bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)];
+	bfqd->device_speed = BFQ_BFQD_FAST;
+
+	return 0;
+}
+
+static void bfq_slab_kill(void)
+{
+	if (bfq_pool != NULL)
+		kmem_cache_destroy(bfq_pool);
+}
+
+static int __init bfq_slab_setup(void)
+{
+	bfq_pool = KMEM_CACHE(bfq_queue, 0);
+	if (bfq_pool == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static ssize_t bfq_var_show(unsigned int var, char *page)
+{
+	return sprintf(page, "%d\n", var);
+}
+
+static ssize_t bfq_var_store(unsigned long *var, const char *page,
+			     size_t count)
+{
+	unsigned long new_val;
+	int ret = kstrtoul(page, 10, &new_val);
+
+	if (ret == 0)
+		*var = new_val;
+
+	return count;
+}
+
+static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ?
+		       jiffies_to_msecs(bfqd->bfq_wr_max_time) :
+		       jiffies_to_msecs(bfq_wr_duration(bfqd)));
+}
+
+static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
+{
+	struct bfq_queue *bfqq;
+	struct bfq_data *bfqd = e->elevator_data;
+	ssize_t num_char = 0;
+
+	num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n",
+			    bfqd->queued);
+
+	spin_lock_irq(bfqd->queue->queue_lock);
+
+	num_char += sprintf(page + num_char, "Active:\n");
+	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {
+	  num_char += sprintf(page + num_char,
+			      "pid%d: weight %hu, nr_queued %d %d, dur %d/%u\n",
+			      bfqq->pid,
+			      bfqq->entity.weight,
+			      bfqq->queued[0],
+			      bfqq->queued[1],
+			jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
+			jiffies_to_msecs(bfqq->wr_cur_max_time));
+	}
+
+	num_char += sprintf(page + num_char, "Idle:\n");
+	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {
+			num_char += sprintf(page + num_char,
+				"pid%d: weight %hu, dur %d/%u\n",
+				bfqq->pid,
+				bfqq->entity.weight,
+				jiffies_to_msecs(jiffies -
+					bfqq->last_wr_start_finish),
+				jiffies_to_msecs(bfqq->wr_cur_max_time));
+	}
+
+	spin_unlock_irq(bfqd->queue->queue_lock);
+
+	return num_char;
+}
+
+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
+{									\
+	struct bfq_data *bfqd = e->elevator_data;			\
+	unsigned int __data = __VAR;					\
+	if (__CONV)							\
+		__data = jiffies_to_msecs(__data);			\
+	return bfq_var_show(__data, (page));				\
+}
+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1);
+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1);
+SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
+SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
+SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
+SHOW_FUNCTION(bfq_max_budget_async_rq_show,
+	      bfqd->bfq_max_budget_async_rq, 0);
+SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1);
+SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1);
+SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
+SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0);
+SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1);
+SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, 1);
+SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async,
+	1);
+SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0);
+#undef SHOW_FUNCTION
+
+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
+static ssize_t								\
+__FUNC(struct elevator_queue *e, const char *page, size_t count)	\
+{									\
+	struct bfq_data *bfqd = e->elevator_data;			\
+	unsigned long uninitialized_var(__data);			\
+	int ret = bfq_var_store(&__data, (page), count);		\
+	if (__data < (MIN))						\
+		__data = (MIN);						\
+	else if (__data > (MAX))					\
+		__data = (MAX);						\
+	if (__CONV)							\
+		*(__PTR) = msecs_to_jiffies(__data);			\
+	else								\
+		*(__PTR) = __data;					\
+	return ret;							\
+}
+STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
+		INT_MAX, 1);
+STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,
+		INT_MAX, 1);
+STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
+STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
+		INT_MAX, 0);
+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);
+STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq,
+		1, INT_MAX, 0);
+STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0,
+		INT_MAX, 1);
+STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0);
+STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1);
+STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX,
+		1);
+STORE_FUNCTION(bfq_wr_min_idle_time_store, &bfqd->bfq_wr_min_idle_time, 0,
+		INT_MAX, 1);
+STORE_FUNCTION(bfq_wr_min_inter_arr_async_store,
+		&bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1);
+STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0,
+		INT_MAX, 0);
+#undef STORE_FUNCTION
+
+/* do nothing for the moment */
+static ssize_t bfq_weights_store(struct elevator_queue *e,
+				    const char *page, size_t count)
+{
+	return count;
+}
+
+static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
+{
+	u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
+
+	if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES)
+		return bfq_calc_max_budget(bfqd->peak_rate, timeout);
+	else
+		return bfq_default_max_budget;
+}
+
+static ssize_t bfq_max_budget_store(struct elevator_queue *e,
+				    const char *page, size_t count)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	unsigned long uninitialized_var(__data);
+	int ret = bfq_var_store(&__data, (page), count);
+
+	if (__data == 0)
+		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
+	else {
+		if (__data > INT_MAX)
+			__data = INT_MAX;
+		bfqd->bfq_max_budget = __data;
+	}
+
+	bfqd->bfq_user_max_budget = __data;
+
+	return ret;
+}
+
+static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
+				      const char *page, size_t count)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	unsigned long uninitialized_var(__data);
+	int ret = bfq_var_store(&__data, (page), count);
+
+	if (__data < 1)
+		__data = 1;
+	else if (__data > INT_MAX)
+		__data = INT_MAX;
+
+	bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data);
+	if (bfqd->bfq_user_max_budget == 0)
+		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
+
+	return ret;
+}
+
+static ssize_t bfq_low_latency_store(struct elevator_queue *e,
+				     const char *page, size_t count)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	unsigned long uninitialized_var(__data);
+	int ret = bfq_var_store(&__data, (page), count);
+
+	if (__data > 1)
+		__data = 1;
+	if (__data == 0 && bfqd->low_latency != 0)
+		bfq_end_wr(bfqd);
+	bfqd->low_latency = __data;
+
+	return ret;
+}
+
+#define BFQ_ATTR(name) \
+	__ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store)
+
+static struct elv_fs_entry bfq_attrs[] = {
+	BFQ_ATTR(fifo_expire_sync),
+	BFQ_ATTR(fifo_expire_async),
+	BFQ_ATTR(back_seek_max),
+	BFQ_ATTR(back_seek_penalty),
+	BFQ_ATTR(slice_idle),
+	BFQ_ATTR(max_budget),
+	BFQ_ATTR(max_budget_async_rq),
+	BFQ_ATTR(timeout_sync),
+	BFQ_ATTR(timeout_async),
+	BFQ_ATTR(low_latency),
+	BFQ_ATTR(wr_coeff),
+	BFQ_ATTR(wr_max_time),
+	BFQ_ATTR(wr_rt_max_time),
+	BFQ_ATTR(wr_min_idle_time),
+	BFQ_ATTR(wr_min_inter_arr_async),
+	BFQ_ATTR(wr_max_softrt_rate),
+	BFQ_ATTR(weights),
+	__ATTR_NULL
+};
+
+static struct elevator_type iosched_bfq = {
+	.ops = {
+		.elevator_merge_fn =		bfq_merge,
+		.elevator_merged_fn =		bfq_merged_request,
+		.elevator_merge_req_fn =	bfq_merged_requests,
+		.elevator_allow_merge_fn =	bfq_allow_merge,
+		.elevator_dispatch_fn =		bfq_dispatch_requests,
+		.elevator_add_req_fn =		bfq_insert_request,
+		.elevator_activate_req_fn =	bfq_activate_request,
+		.elevator_deactivate_req_fn =	bfq_deactivate_request,
+		.elevator_completed_req_fn =	bfq_completed_request,
+		.elevator_former_req_fn =	elv_rb_former_request,
+		.elevator_latter_req_fn =	elv_rb_latter_request,
+		.elevator_init_icq_fn =		bfq_init_icq,
+		.elevator_exit_icq_fn =		bfq_exit_icq,
+		.elevator_set_req_fn =		bfq_set_request,
+		.elevator_put_req_fn =		bfq_put_request,
+		.elevator_may_queue_fn =	bfq_may_queue,
+		.elevator_init_fn =		bfq_init_queue,
+		.elevator_exit_fn =		bfq_exit_queue,
+	},
+	.icq_size =		sizeof(struct bfq_io_cq),
+	.icq_align =		__alignof__(struct bfq_io_cq),
+	.elevator_attrs =	bfq_attrs,
+	.elevator_name =	"bfq",
+	.elevator_owner =	THIS_MODULE,
+};
+
+static int __init bfq_init(void)
+{
+	/*
+	 * Can be 0 on HZ < 1000 setups.
+	 */
+	if (bfq_slice_idle == 0)
+		bfq_slice_idle = 1;
+
+	if (bfq_timeout_async == 0)
+		bfq_timeout_async = 1;
+
+	if (bfq_slab_setup())
+		return -ENOMEM;
+
+	/*
+	 * Times to load large popular applications for the typical systems
+	 * installed on the reference devices (see the comments before the
+	 * definitions of the two arrays).
+	 */
+	T_slow[0] = msecs_to_jiffies(2600);
+	T_slow[1] = msecs_to_jiffies(1000);
+	T_fast[0] = msecs_to_jiffies(5500);
+	T_fast[1] = msecs_to_jiffies(2000);
+
+	/*
+	 * Thresholds that determine the switch between speed classes (see
+	 * the comments before the definition of the array).
+	 */
+	device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2;
+	device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2;
+
+	elv_register(&iosched_bfq);
+	pr_info("BFQ I/O-scheduler: v7r8");
+
+	return 0;
+}
+
+static void __exit bfq_exit(void)
+{
+	elv_unregister(&iosched_bfq);
+	bfq_slab_kill();
+}
+
+module_init(bfq_init);
+module_exit(bfq_exit);
+
+MODULE_AUTHOR("Fabio Checconi, Paolo Valente");
+MODULE_LICENSE("GPL");
diff -Nur linux-4.2/block/bfq-sched.c linux-4.2-pck/block/bfq-sched.c
--- linux-4.2/block/bfq-sched.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/block/bfq-sched.c	2015-09-08 00:48:22.218364355 -0300
@@ -0,0 +1,1180 @@
+/*
+ * BFQ: Hierarchical B-WF2Q+ scheduler.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
+ */
+
+#ifdef CONFIG_CGROUP_BFQIO
+#define for_each_entity(entity)	\
+	for (; entity != NULL; entity = entity->parent)
+
+#define for_each_entity_safe(entity, parent) \
+	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
+
+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+						 int extract,
+						 struct bfq_data *bfqd);
+
+static inline void bfq_update_budget(struct bfq_entity *next_in_service)
+{
+	struct bfq_entity *bfqg_entity;
+	struct bfq_group *bfqg;
+	struct bfq_sched_data *group_sd;
+
+	BUG_ON(next_in_service == NULL);
+
+	group_sd = next_in_service->sched_data;
+
+	bfqg = container_of(group_sd, struct bfq_group, sched_data);
+	/*
+	 * bfq_group's my_entity field is not NULL only if the group
+	 * is not the root group. We must not touch the root entity
+	 * as it must never become an in-service entity.
+	 */
+	bfqg_entity = bfqg->my_entity;
+	if (bfqg_entity != NULL)
+		bfqg_entity->budget = next_in_service->budget;
+}
+
+static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+{
+	struct bfq_entity *next_in_service;
+
+	if (sd->in_service_entity != NULL)
+		/* will update/requeue at the end of service */
+		return 0;
+
+	/*
+	 * NOTE: this can be improved in many ways, such as returning
+	 * 1 (and thus propagating upwards the update) only when the
+	 * budget changes, or caching the bfqq that will be scheduled
+	 * next from this subtree.  By now we worry more about
+	 * correctness than about performance...
+	 */
+	next_in_service = bfq_lookup_next_entity(sd, 0, NULL);
+	sd->next_in_service = next_in_service;
+
+	if (next_in_service != NULL)
+		bfq_update_budget(next_in_service);
+
+	return 1;
+}
+
+static inline void bfq_check_next_in_service(struct bfq_sched_data *sd,
+					     struct bfq_entity *entity)
+{
+	BUG_ON(sd->next_in_service != entity);
+}
+#else
+#define for_each_entity(entity)	\
+	for (; entity != NULL; entity = NULL)
+
+#define for_each_entity_safe(entity, parent) \
+	for (parent = NULL; entity != NULL; entity = parent)
+
+static inline int bfq_update_next_in_service(struct bfq_sched_data *sd)
+{
+	return 0;
+}
+
+static inline void bfq_check_next_in_service(struct bfq_sched_data *sd,
+					     struct bfq_entity *entity)
+{
+}
+
+static inline void bfq_update_budget(struct bfq_entity *next_in_service)
+{
+}
+#endif
+
+/*
+ * Shift for timestamp calculations.  This actually limits the maximum
+ * service allowed in one timestamp delta (small shift values increase it),
+ * the maximum total weight that can be used for the queues in the system
+ * (big shift values increase it), and the period of virtual time
+ * wraparounds.
+ */
+#define WFQ_SERVICE_SHIFT	22
+
+/**
+ * bfq_gt - compare two timestamps.
+ * @a: first ts.
+ * @b: second ts.
+ *
+ * Return @a > @b, dealing with wrapping correctly.
+ */
+static inline int bfq_gt(u64 a, u64 b)
+{
+	return (s64)(a - b) > 0;
+}
+
+static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = NULL;
+
+	BUG_ON(entity == NULL);
+
+	if (entity->my_sched_data == NULL)
+		bfqq = container_of(entity, struct bfq_queue, entity);
+
+	return bfqq;
+}
+
+
+/**
+ * bfq_delta - map service into the virtual time domain.
+ * @service: amount of service.
+ * @weight: scale factor (weight of an entity or weight sum).
+ */
+static inline u64 bfq_delta(unsigned long service,
+					unsigned long weight)
+{
+	u64 d = (u64)service << WFQ_SERVICE_SHIFT;
+
+	do_div(d, weight);
+	return d;
+}
+
+/**
+ * bfq_calc_finish - assign the finish time to an entity.
+ * @entity: the entity to act upon.
+ * @service: the service to be charged to the entity.
+ */
+static inline void bfq_calc_finish(struct bfq_entity *entity,
+				   unsigned long service)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+	BUG_ON(entity->weight == 0);
+
+	entity->finish = entity->start +
+		bfq_delta(service, entity->weight);
+
+	if (bfqq != NULL) {
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			"calc_finish: serv %lu, w %d",
+			service, entity->weight);
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			"calc_finish: start %llu, finish %llu, delta %llu",
+			entity->start, entity->finish,
+			bfq_delta(service, entity->weight));
+	}
+}
+
+/**
+ * bfq_entity_of - get an entity from a node.
+ * @node: the node field of the entity.
+ *
+ * Convert a node pointer to the relative entity.  This is used only
+ * to simplify the logic of some functions and not as the generic
+ * conversion mechanism because, e.g., in the tree walking functions,
+ * the check for a %NULL value would be redundant.
+ */
+static inline struct bfq_entity *bfq_entity_of(struct rb_node *node)
+{
+	struct bfq_entity *entity = NULL;
+
+	if (node != NULL)
+		entity = rb_entry(node, struct bfq_entity, rb_node);
+
+	return entity;
+}
+
+/**
+ * bfq_extract - remove an entity from a tree.
+ * @root: the tree root.
+ * @entity: the entity to remove.
+ */
+static inline void bfq_extract(struct rb_root *root,
+			       struct bfq_entity *entity)
+{
+	BUG_ON(entity->tree != root);
+
+	entity->tree = NULL;
+	rb_erase(&entity->rb_node, root);
+}
+
+/**
+ * bfq_idle_extract - extract an entity from the idle tree.
+ * @st: the service tree of the owning @entity.
+ * @entity: the entity being removed.
+ */
+static void bfq_idle_extract(struct bfq_service_tree *st,
+			     struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	struct rb_node *next;
+
+	BUG_ON(entity->tree != &st->idle);
+
+	if (entity == st->first_idle) {
+		next = rb_next(&entity->rb_node);
+		st->first_idle = bfq_entity_of(next);
+	}
+
+	if (entity == st->last_idle) {
+		next = rb_prev(&entity->rb_node);
+		st->last_idle = bfq_entity_of(next);
+	}
+
+	bfq_extract(&st->idle, entity);
+
+	if (bfqq != NULL)
+		list_del(&bfqq->bfqq_list);
+}
+
+/**
+ * bfq_insert - generic tree insertion.
+ * @root: tree root.
+ * @entity: entity to insert.
+ *
+ * This is used for the idle and the active tree, since they are both
+ * ordered by finish time.
+ */
+static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
+{
+	struct bfq_entity *entry;
+	struct rb_node **node = &root->rb_node;
+	struct rb_node *parent = NULL;
+
+	BUG_ON(entity->tree != NULL);
+
+	while (*node != NULL) {
+		parent = *node;
+		entry = rb_entry(parent, struct bfq_entity, rb_node);
+
+		if (bfq_gt(entry->finish, entity->finish))
+			node = &parent->rb_left;
+		else
+			node = &parent->rb_right;
+	}
+
+	rb_link_node(&entity->rb_node, parent, node);
+	rb_insert_color(&entity->rb_node, root);
+
+	entity->tree = root;
+}
+
+/**
+ * bfq_update_min - update the min_start field of a entity.
+ * @entity: the entity to update.
+ * @node: one of its children.
+ *
+ * This function is called when @entity may store an invalid value for
+ * min_start due to updates to the active tree.  The function  assumes
+ * that the subtree rooted at @node (which may be its left or its right
+ * child) has a valid min_start value.
+ */
+static inline void bfq_update_min(struct bfq_entity *entity,
+				  struct rb_node *node)
+{
+	struct bfq_entity *child;
+
+	if (node != NULL) {
+		child = rb_entry(node, struct bfq_entity, rb_node);
+		if (bfq_gt(entity->min_start, child->min_start))
+			entity->min_start = child->min_start;
+	}
+}
+
+/**
+ * bfq_update_active_node - recalculate min_start.
+ * @node: the node to update.
+ *
+ * @node may have changed position or one of its children may have moved,
+ * this function updates its min_start value.  The left and right subtrees
+ * are assumed to hold a correct min_start value.
+ */
+static inline void bfq_update_active_node(struct rb_node *node)
+{
+	struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
+
+	entity->min_start = entity->start;
+	bfq_update_min(entity, node->rb_right);
+	bfq_update_min(entity, node->rb_left);
+}
+
+/**
+ * bfq_update_active_tree - update min_start for the whole active tree.
+ * @node: the starting node.
+ *
+ * @node must be the deepest modified node after an update.  This function
+ * updates its min_start using the values held by its children, assuming
+ * that they did not change, and then updates all the nodes that may have
+ * changed in the path to the root.  The only nodes that may have changed
+ * are the ones in the path or their siblings.
+ */
+static void bfq_update_active_tree(struct rb_node *node)
+{
+	struct rb_node *parent;
+
+up:
+	bfq_update_active_node(node);
+
+	parent = rb_parent(node);
+	if (parent == NULL)
+		return;
+
+	if (node == parent->rb_left && parent->rb_right != NULL)
+		bfq_update_active_node(parent->rb_right);
+	else if (parent->rb_left != NULL)
+		bfq_update_active_node(parent->rb_left);
+
+	node = parent;
+	goto up;
+}
+
+static void bfq_weights_tree_add(struct bfq_data *bfqd,
+				 struct bfq_entity *entity,
+				 struct rb_root *root);
+
+static void bfq_weights_tree_remove(struct bfq_data *bfqd,
+				    struct bfq_entity *entity,
+				    struct rb_root *root);
+
+
+/**
+ * bfq_active_insert - insert an entity in the active tree of its
+ *                     group/device.
+ * @st: the service tree of the entity.
+ * @entity: the entity being inserted.
+ *
+ * The active tree is ordered by finish time, but an extra key is kept
+ * per each node, containing the minimum value for the start times of
+ * its children (and the node itself), so it's possible to search for
+ * the eligible node with the lowest finish time in logarithmic time.
+ */
+static void bfq_active_insert(struct bfq_service_tree *st,
+			      struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	struct rb_node *node = &entity->rb_node;
+#ifdef CONFIG_CGROUP_BFQIO
+	struct bfq_sched_data *sd = NULL;
+	struct bfq_group *bfqg = NULL;
+	struct bfq_data *bfqd = NULL;
+#endif
+
+	bfq_insert(&st->active, entity);
+
+	if (node->rb_left != NULL)
+		node = node->rb_left;
+	else if (node->rb_right != NULL)
+		node = node->rb_right;
+
+	bfq_update_active_tree(node);
+
+#ifdef CONFIG_CGROUP_BFQIO
+	sd = entity->sched_data;
+	bfqg = container_of(sd, struct bfq_group, sched_data);
+	BUG_ON(!bfqg);
+	bfqd = (struct bfq_data *)bfqg->bfqd;
+#endif
+	if (bfqq != NULL)
+		list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
+#ifdef CONFIG_CGROUP_BFQIO
+	else { /* bfq_group */
+		BUG_ON(!bfqd);
+		bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree);
+	}
+	if (bfqg != bfqd->root_group) {
+		BUG_ON(!bfqg);
+		BUG_ON(!bfqd);
+		bfqg->active_entities++;
+		if (bfqg->active_entities == 2)
+			bfqd->active_numerous_groups++;
+	}
+#endif
+}
+
+/**
+ * bfq_ioprio_to_weight - calc a weight from an ioprio.
+ * @ioprio: the ioprio value to convert.
+ */
+static inline unsigned short bfq_ioprio_to_weight(int ioprio)
+{
+	BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
+	return IOPRIO_BE_NR - ioprio;
+}
+
+/**
+ * bfq_weight_to_ioprio - calc an ioprio from a weight.
+ * @weight: the weight value to convert.
+ *
+ * To preserve as mush as possible the old only-ioprio user interface,
+ * 0 is used as an escape ioprio value for weights (numerically) equal or
+ * larger than IOPRIO_BE_NR
+ */
+static inline unsigned short bfq_weight_to_ioprio(int weight)
+{
+	BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);
+	return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight;
+}
+
+static inline void bfq_get_entity(struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+	if (bfqq != NULL) {
+		atomic_inc(&bfqq->ref);
+		bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
+			     bfqq, atomic_read(&bfqq->ref));
+	}
+}
+
+/**
+ * bfq_find_deepest - find the deepest node that an extraction can modify.
+ * @node: the node being removed.
+ *
+ * Do the first step of an extraction in an rb tree, looking for the
+ * node that will replace @node, and returning the deepest node that
+ * the following modifications to the tree can touch.  If @node is the
+ * last node in the tree return %NULL.
+ */
+static struct rb_node *bfq_find_deepest(struct rb_node *node)
+{
+	struct rb_node *deepest;
+
+	if (node->rb_right == NULL && node->rb_left == NULL)
+		deepest = rb_parent(node);
+	else if (node->rb_right == NULL)
+		deepest = node->rb_left;
+	else if (node->rb_left == NULL)
+		deepest = node->rb_right;
+	else {
+		deepest = rb_next(node);
+		if (deepest->rb_right != NULL)
+			deepest = deepest->rb_right;
+		else if (rb_parent(deepest) != node)
+			deepest = rb_parent(deepest);
+	}
+
+	return deepest;
+}
+
+/**
+ * bfq_active_extract - remove an entity from the active tree.
+ * @st: the service_tree containing the tree.
+ * @entity: the entity being removed.
+ */
+static void bfq_active_extract(struct bfq_service_tree *st,
+			       struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	struct rb_node *node;
+#ifdef CONFIG_CGROUP_BFQIO
+	struct bfq_sched_data *sd = NULL;
+	struct bfq_group *bfqg = NULL;
+	struct bfq_data *bfqd = NULL;
+#endif
+
+	node = bfq_find_deepest(&entity->rb_node);
+	bfq_extract(&st->active, entity);
+
+	if (node != NULL)
+		bfq_update_active_tree(node);
+
+#ifdef CONFIG_CGROUP_BFQIO
+	sd = entity->sched_data;
+	bfqg = container_of(sd, struct bfq_group, sched_data);
+	BUG_ON(!bfqg);
+	bfqd = (struct bfq_data *)bfqg->bfqd;
+#endif
+	if (bfqq != NULL)
+		list_del(&bfqq->bfqq_list);
+#ifdef CONFIG_CGROUP_BFQIO
+	else { /* bfq_group */
+		BUG_ON(!bfqd);
+		bfq_weights_tree_remove(bfqd, entity,
+					&bfqd->group_weights_tree);
+	}
+	if (bfqg != bfqd->root_group) {
+		BUG_ON(!bfqg);
+		BUG_ON(!bfqd);
+		BUG_ON(!bfqg->active_entities);
+		bfqg->active_entities--;
+		if (bfqg->active_entities == 1) {
+			BUG_ON(!bfqd->active_numerous_groups);
+			bfqd->active_numerous_groups--;
+		}
+	}
+#endif
+}
+
+/**
+ * bfq_idle_insert - insert an entity into the idle tree.
+ * @st: the service tree containing the tree.
+ * @entity: the entity to insert.
+ */
+static void bfq_idle_insert(struct bfq_service_tree *st,
+			    struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	struct bfq_entity *first_idle = st->first_idle;
+	struct bfq_entity *last_idle = st->last_idle;
+
+	if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish))
+		st->first_idle = entity;
+	if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish))
+		st->last_idle = entity;
+
+	bfq_insert(&st->idle, entity);
+
+	if (bfqq != NULL)
+		list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
+}
+
+/**
+ * bfq_forget_entity - remove an entity from the wfq trees.
+ * @st: the service tree.
+ * @entity: the entity being removed.
+ *
+ * Update the device status and forget everything about @entity, putting
+ * the device reference to it, if it is a queue.  Entities belonging to
+ * groups are not refcounted.
+ */
+static void bfq_forget_entity(struct bfq_service_tree *st,
+			      struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	struct bfq_sched_data *sd;
+
+	BUG_ON(!entity->on_st);
+
+	entity->on_st = 0;
+	st->wsum -= entity->weight;
+	if (bfqq != NULL) {
+		sd = entity->sched_data;
+		bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",
+			     bfqq, atomic_read(&bfqq->ref));
+		bfq_put_queue(bfqq);
+	}
+}
+
+/**
+ * bfq_put_idle_entity - release the idle tree ref of an entity.
+ * @st: service tree for the entity.
+ * @entity: the entity being released.
+ */
+static void bfq_put_idle_entity(struct bfq_service_tree *st,
+				struct bfq_entity *entity)
+{
+	bfq_idle_extract(st, entity);
+	bfq_forget_entity(st, entity);
+}
+
+/**
+ * bfq_forget_idle - update the idle tree if necessary.
+ * @st: the service tree to act upon.
+ *
+ * To preserve the global O(log N) complexity we only remove one entry here;
+ * as the idle tree will not grow indefinitely this can be done safely.
+ */
+static void bfq_forget_idle(struct bfq_service_tree *st)
+{
+	struct bfq_entity *first_idle = st->first_idle;
+	struct bfq_entity *last_idle = st->last_idle;
+
+	if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL &&
+	    !bfq_gt(last_idle->finish, st->vtime)) {
+		/*
+		 * Forget the whole idle tree, increasing the vtime past
+		 * the last finish time of idle entities.
+		 */
+		st->vtime = last_idle->finish;
+	}
+
+	if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime))
+		bfq_put_idle_entity(st, first_idle);
+}
+
+static struct bfq_service_tree *
+__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
+			 struct bfq_entity *entity)
+{
+	struct bfq_service_tree *new_st = old_st;
+
+	if (entity->ioprio_changed) {
+		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+		unsigned short prev_weight, new_weight;
+		struct bfq_data *bfqd = NULL;
+		struct rb_root *root;
+#ifdef CONFIG_CGROUP_BFQIO
+		struct bfq_sched_data *sd;
+		struct bfq_group *bfqg;
+#endif
+
+		if (bfqq != NULL)
+			bfqd = bfqq->bfqd;
+#ifdef CONFIG_CGROUP_BFQIO
+		else {
+			sd = entity->my_sched_data;
+			bfqg = container_of(sd, struct bfq_group, sched_data);
+			BUG_ON(!bfqg);
+			bfqd = (struct bfq_data *)bfqg->bfqd;
+			BUG_ON(!bfqd);
+		}
+#endif
+
+		BUG_ON(old_st->wsum < entity->weight);
+		old_st->wsum -= entity->weight;
+
+		if (entity->new_weight != entity->orig_weight) {
+			if (entity->new_weight < BFQ_MIN_WEIGHT ||
+			    entity->new_weight > BFQ_MAX_WEIGHT) {
+				printk(KERN_CRIT "update_weight_prio: "
+						 "new_weight %d\n",
+					entity->new_weight);
+				BUG();
+			}
+			entity->orig_weight = entity->new_weight;
+			entity->ioprio =
+				bfq_weight_to_ioprio(entity->orig_weight);
+		}
+
+		entity->ioprio_class = entity->new_ioprio_class;
+		entity->ioprio_changed = 0;
+
+		/*
+		 * NOTE: here we may be changing the weight too early,
+		 * this will cause unfairness.  The correct approach
+		 * would have required additional complexity to defer
+		 * weight changes to the proper time instants (i.e.,
+		 * when entity->finish <= old_st->vtime).
+		 */
+		new_st = bfq_entity_service_tree(entity);
+
+		prev_weight = entity->weight;
+		new_weight = entity->orig_weight *
+			     (bfqq != NULL ? bfqq->wr_coeff : 1);
+		/*
+		 * If the weight of the entity changes, remove the entity
+		 * from its old weight counter (if there is a counter
+		 * associated with the entity), and add it to the counter
+		 * associated with its new weight.
+		 */
+		if (prev_weight != new_weight) {
+			root = bfqq ? &bfqd->queue_weights_tree :
+				      &bfqd->group_weights_tree;
+			bfq_weights_tree_remove(bfqd, entity, root);
+		}
+		entity->weight = new_weight;
+		/*
+		 * Add the entity to its weights tree only if it is
+		 * not associated with a weight-raised queue.
+		 */
+		if (prev_weight != new_weight &&
+		    (bfqq ? bfqq->wr_coeff == 1 : 1))
+			/* If we get here, root has been initialized. */
+			bfq_weights_tree_add(bfqd, entity, root);
+
+		new_st->wsum += entity->weight;
+
+		if (new_st != old_st)
+			entity->start = new_st->vtime;
+	}
+
+	return new_st;
+}
+
+/**
+ * bfq_bfqq_served - update the scheduler status after selection for
+ *                   service.
+ * @bfqq: the queue being served.
+ * @served: bytes to transfer.
+ *
+ * NOTE: this can be optimized, as the timestamps of upper level entities
+ * are synchronized every time a new bfqq is selected for service.  By now,
+ * we keep it to better check consistency.
+ */
+static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+	struct bfq_service_tree *st;
+
+	for_each_entity(entity) {
+		st = bfq_entity_service_tree(entity);
+
+		entity->service += served;
+		BUG_ON(entity->service > entity->budget);
+		BUG_ON(st->wsum == 0);
+
+		st->vtime += bfq_delta(served, st->wsum);
+		bfq_forget_idle(st);
+	}
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served);
+}
+
+/**
+ * bfq_bfqq_charge_full_budget - set the service to the entity budget.
+ * @bfqq: the queue that needs a service update.
+ *
+ * When it's not possible to be fair in the service domain, because
+ * a queue is not consuming its budget fast enough (the meaning of
+ * fast depends on the timeout parameter), we charge it a full
+ * budget.  In this way we should obtain a sort of time-domain
+ * fairness among all the seeky/slow queues.
+ */
+static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");
+
+	bfq_bfqq_served(bfqq, entity->budget - entity->service);
+}
+
+/**
+ * __bfq_activate_entity - activate an entity.
+ * @entity: the entity being activated.
+ *
+ * Called whenever an entity is activated, i.e., it is not active and one
+ * of its children receives a new request, or has to be reactivated due to
+ * budget exhaustion.  It uses the current budget of the entity (and the
+ * service received if @entity is active) of the queue to calculate its
+ * timestamps.
+ */
+static void __bfq_activate_entity(struct bfq_entity *entity)
+{
+	struct bfq_sched_data *sd = entity->sched_data;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+
+	if (entity == sd->in_service_entity) {
+		BUG_ON(entity->tree != NULL);
+		/*
+		 * If we are requeueing the current entity we have
+		 * to take care of not charging to it service it has
+		 * not received.
+		 */
+		bfq_calc_finish(entity, entity->service);
+		entity->start = entity->finish;
+		sd->in_service_entity = NULL;
+	} else if (entity->tree == &st->active) {
+		/*
+		 * Requeueing an entity due to a change of some
+		 * next_in_service entity below it.  We reuse the
+		 * old start time.
+		 */
+		bfq_active_extract(st, entity);
+	} else if (entity->tree == &st->idle) {
+		/*
+		 * Must be on the idle tree, bfq_idle_extract() will
+		 * check for that.
+		 */
+		bfq_idle_extract(st, entity);
+		entity->start = bfq_gt(st->vtime, entity->finish) ?
+				       st->vtime : entity->finish;
+	} else {
+		/*
+		 * The finish time of the entity may be invalid, and
+		 * it is in the past for sure, otherwise the queue
+		 * would have been on the idle tree.
+		 */
+		entity->start = st->vtime;
+		st->wsum += entity->weight;
+		bfq_get_entity(entity);
+
+		BUG_ON(entity->on_st);
+		entity->on_st = 1;
+	}
+
+	st = __bfq_entity_update_weight_prio(st, entity);
+	bfq_calc_finish(entity, entity->budget);
+	bfq_active_insert(st, entity);
+}
+
+/**
+ * bfq_activate_entity - activate an entity and its ancestors if necessary.
+ * @entity: the entity to activate.
+ *
+ * Activate @entity and all the entities on the path from it to the root.
+ */
+static void bfq_activate_entity(struct bfq_entity *entity)
+{
+	struct bfq_sched_data *sd;
+
+	for_each_entity(entity) {
+		__bfq_activate_entity(entity);
+
+		sd = entity->sched_data;
+		if (!bfq_update_next_in_service(sd))
+			/*
+			 * No need to propagate the activation to the
+			 * upper entities, as they will be updated when
+			 * the in-service entity is rescheduled.
+			 */
+			break;
+	}
+}
+
+/**
+ * __bfq_deactivate_entity - deactivate an entity from its service tree.
+ * @entity: the entity to deactivate.
+ * @requeue: if false, the entity will not be put into the idle tree.
+ *
+ * Deactivate an entity, independently from its previous state.  If the
+ * entity was not on a service tree just return, otherwise if it is on
+ * any scheduler tree, extract it from that tree, and if necessary
+ * and if the caller did not specify @requeue, put it on the idle tree.
+ *
+ * Return %1 if the caller should update the entity hierarchy, i.e.,
+ * if the entity was in service or if it was the next_in_service for
+ * its sched_data; return %0 otherwise.
+ */
+static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+{
+	struct bfq_sched_data *sd = entity->sched_data;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+	int was_in_service = entity == sd->in_service_entity;
+	int ret = 0;
+
+	if (!entity->on_st)
+		return 0;
+
+	BUG_ON(was_in_service && entity->tree != NULL);
+
+	if (was_in_service) {
+		bfq_calc_finish(entity, entity->service);
+		sd->in_service_entity = NULL;
+	} else if (entity->tree == &st->active)
+		bfq_active_extract(st, entity);
+	else if (entity->tree == &st->idle)
+		bfq_idle_extract(st, entity);
+	else if (entity->tree != NULL)
+		BUG();
+
+	if (was_in_service || sd->next_in_service == entity)
+		ret = bfq_update_next_in_service(sd);
+
+	if (!requeue || !bfq_gt(entity->finish, st->vtime))
+		bfq_forget_entity(st, entity);
+	else
+		bfq_idle_insert(st, entity);
+
+	BUG_ON(sd->in_service_entity == entity);
+	BUG_ON(sd->next_in_service == entity);
+
+	return ret;
+}
+
+/**
+ * bfq_deactivate_entity - deactivate an entity.
+ * @entity: the entity to deactivate.
+ * @requeue: true if the entity can be put on the idle tree
+ */
+static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+{
+	struct bfq_sched_data *sd;
+	struct bfq_entity *parent;
+
+	for_each_entity_safe(entity, parent) {
+		sd = entity->sched_data;
+
+		if (!__bfq_deactivate_entity(entity, requeue))
+			/*
+			 * The parent entity is still backlogged, and
+			 * we don't need to update it as it is still
+			 * in service.
+			 */
+			break;
+
+		if (sd->next_in_service != NULL)
+			/*
+			 * The parent entity is still backlogged and
+			 * the budgets on the path towards the root
+			 * need to be updated.
+			 */
+			goto update;
+
+		/*
+		 * If we reach there the parent is no more backlogged and
+		 * we want to propagate the dequeue upwards.
+		 */
+		requeue = 1;
+	}
+
+	return;
+
+update:
+	entity = parent;
+	for_each_entity(entity) {
+		__bfq_activate_entity(entity);
+
+		sd = entity->sched_data;
+		if (!bfq_update_next_in_service(sd))
+			break;
+	}
+}
+
+/**
+ * bfq_update_vtime - update vtime if necessary.
+ * @st: the service tree to act upon.
+ *
+ * If necessary update the service tree vtime to have at least one
+ * eligible entity, skipping to its start time.  Assumes that the
+ * active tree of the device is not empty.
+ *
+ * NOTE: this hierarchical implementation updates vtimes quite often,
+ * we may end up with reactivated processes getting timestamps after a
+ * vtime skip done because we needed a ->first_active entity on some
+ * intermediate node.
+ */
+static void bfq_update_vtime(struct bfq_service_tree *st)
+{
+	struct bfq_entity *entry;
+	struct rb_node *node = st->active.rb_node;
+
+	entry = rb_entry(node, struct bfq_entity, rb_node);
+	if (bfq_gt(entry->min_start, st->vtime)) {
+		st->vtime = entry->min_start;
+		bfq_forget_idle(st);
+	}
+}
+
+/**
+ * bfq_first_active_entity - find the eligible entity with
+ *                           the smallest finish time
+ * @st: the service tree to select from.
+ *
+ * This function searches the first schedulable entity, starting from the
+ * root of the tree and going on the left every time on this side there is
+ * a subtree with at least one eligible (start >= vtime) entity. The path on
+ * the right is followed only if a) the left subtree contains no eligible
+ * entities and b) no eligible entity has been found yet.
+ */
+static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
+{
+	struct bfq_entity *entry, *first = NULL;
+	struct rb_node *node = st->active.rb_node;
+
+	while (node != NULL) {
+		entry = rb_entry(node, struct bfq_entity, rb_node);
+left:
+		if (!bfq_gt(entry->start, st->vtime))
+			first = entry;
+
+		BUG_ON(bfq_gt(entry->min_start, st->vtime));
+
+		if (node->rb_left != NULL) {
+			entry = rb_entry(node->rb_left,
+					 struct bfq_entity, rb_node);
+			if (!bfq_gt(entry->min_start, st->vtime)) {
+				node = node->rb_left;
+				goto left;
+			}
+		}
+		if (first != NULL)
+			break;
+		node = node->rb_right;
+	}
+
+	BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active));
+	return first;
+}
+
+/**
+ * __bfq_lookup_next_entity - return the first eligible entity in @st.
+ * @st: the service tree.
+ *
+ * Update the virtual time in @st and return the first eligible entity
+ * it contains.
+ */
+static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
+						   bool force)
+{
+	struct bfq_entity *entity, *new_next_in_service = NULL;
+
+	if (RB_EMPTY_ROOT(&st->active))
+		return NULL;
+
+	bfq_update_vtime(st);
+	entity = bfq_first_active_entity(st);
+	BUG_ON(bfq_gt(entity->start, st->vtime));
+
+	/*
+	 * If the chosen entity does not match with the sched_data's
+	 * next_in_service and we are forcedly serving the IDLE priority
+	 * class tree, bubble up budget update.
+	 */
+	if (unlikely(force && entity != entity->sched_data->next_in_service)) {
+		new_next_in_service = entity;
+		for_each_entity(new_next_in_service)
+			bfq_update_budget(new_next_in_service);
+	}
+
+	return entity;
+}
+
+/**
+ * bfq_lookup_next_entity - return the first eligible entity in @sd.
+ * @sd: the sched_data.
+ * @extract: if true the returned entity will be also extracted from @sd.
+ *
+ * NOTE: since we cache the next_in_service entity at each level of the
+ * hierarchy, the complexity of the lookup can be decreased with
+ * absolutely no effort just returning the cached next_in_service value;
+ * we prefer to do full lookups to test the consistency of * the data
+ * structures.
+ */
+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+						 int extract,
+						 struct bfq_data *bfqd)
+{
+	struct bfq_service_tree *st = sd->service_tree;
+	struct bfq_entity *entity;
+	int i = 0;
+
+	BUG_ON(sd->in_service_entity != NULL);
+
+	if (bfqd != NULL &&
+	    jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {
+		entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,
+						  true);
+		if (entity != NULL) {
+			i = BFQ_IOPRIO_CLASSES - 1;
+			bfqd->bfq_class_idle_last_service = jiffies;
+			sd->next_in_service = entity;
+		}
+	}
+	for (; i < BFQ_IOPRIO_CLASSES; i++) {
+		entity = __bfq_lookup_next_entity(st + i, false);
+		if (entity != NULL) {
+			if (extract) {
+				bfq_check_next_in_service(sd, entity);
+				bfq_active_extract(st + i, entity);
+				sd->in_service_entity = entity;
+				sd->next_in_service = NULL;
+			}
+			break;
+		}
+	}
+
+	return entity;
+}
+
+/*
+ * Get next queue for service.
+ */
+static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+{
+	struct bfq_entity *entity = NULL;
+	struct bfq_sched_data *sd;
+	struct bfq_queue *bfqq;
+
+	BUG_ON(bfqd->in_service_queue != NULL);
+
+	if (bfqd->busy_queues == 0)
+		return NULL;
+
+	sd = &bfqd->root_group->sched_data;
+	for (; sd != NULL; sd = entity->my_sched_data) {
+		entity = bfq_lookup_next_entity(sd, 1, bfqd);
+		BUG_ON(entity == NULL);
+		entity->service = 0;
+	}
+
+	bfqq = bfq_entity_to_bfqq(entity);
+	BUG_ON(bfqq == NULL);
+
+	return bfqq;
+}
+
+static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+{
+	if (bfqd->in_service_bic != NULL) {
+		put_io_context(bfqd->in_service_bic->icq.ioc);
+		bfqd->in_service_bic = NULL;
+	}
+
+	bfqd->in_service_queue = NULL;
+	del_timer(&bfqd->idle_slice_timer);
+}
+
+static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+				int requeue)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	if (bfqq == bfqd->in_service_queue)
+		__bfq_bfqd_reset_in_service(bfqd);
+
+	bfq_deactivate_entity(entity, requeue);
+}
+
+static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	bfq_activate_entity(entity);
+}
+
+/*
+ * Called when the bfqq no longer has requests pending, remove it from
+ * the service tree.
+ */
+static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			      int requeue)
+{
+	BUG_ON(!bfq_bfqq_busy(bfqq));
+	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+
+	bfq_log_bfqq(bfqd, bfqq, "del from busy");
+
+	bfq_clear_bfqq_busy(bfqq);
+
+	BUG_ON(bfqd->busy_queues == 0);
+	bfqd->busy_queues--;
+
+	if (!bfqq->dispatched) {
+		bfq_weights_tree_remove(bfqd, &bfqq->entity,
+					&bfqd->queue_weights_tree);
+		if (!blk_queue_nonrot(bfqd->queue)) {
+			BUG_ON(!bfqd->busy_in_flight_queues);
+			bfqd->busy_in_flight_queues--;
+			if (bfq_bfqq_constantly_seeky(bfqq)) {
+				BUG_ON(!bfqd->
+					const_seeky_busy_in_flight_queues);
+				bfqd->const_seeky_busy_in_flight_queues--;
+			}
+		}
+	}
+	if (bfqq->wr_coeff > 1)
+		bfqd->wr_busy_queues--;
+
+	bfq_deactivate_bfqq(bfqd, bfqq, requeue);
+}
+
+/*
+ * Called when an inactive queue receives a new request.
+ */
+static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	BUG_ON(bfq_bfqq_busy(bfqq));
+	BUG_ON(bfqq == bfqd->in_service_queue);
+
+	bfq_log_bfqq(bfqd, bfqq, "add to busy");
+
+	bfq_activate_bfqq(bfqd, bfqq);
+
+	bfq_mark_bfqq_busy(bfqq);
+	bfqd->busy_queues++;
+
+	if (!bfqq->dispatched) {
+		if (bfqq->wr_coeff == 1)
+			bfq_weights_tree_add(bfqd, &bfqq->entity,
+					     &bfqd->queue_weights_tree);
+		if (!blk_queue_nonrot(bfqd->queue)) {
+			bfqd->busy_in_flight_queues++;
+			if (bfq_bfqq_constantly_seeky(bfqq))
+				bfqd->const_seeky_busy_in_flight_queues++;
+		}
+	}
+	if (bfqq->wr_coeff > 1)
+		bfqd->wr_busy_queues++;
+}
diff -Nur linux-4.2/block/bfq.h linux-4.2-pck/block/bfq.h
--- linux-4.2/block/bfq.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/block/bfq.h	2015-09-08 00:48:22.218364355 -0300
@@ -0,0 +1,807 @@
+/*
+ * BFQ-v7r8 for 4.1.0: data structures and common functions prototypes.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
+ */
+
+#ifndef _BFQ_H
+#define _BFQ_H
+
+#include <linux/blktrace_api.h>
+#include <linux/hrtimer.h>
+#include <linux/ioprio.h>
+#include <linux/rbtree.h>
+
+#define BFQ_IOPRIO_CLASSES	3
+#define BFQ_CL_IDLE_TIMEOUT	(HZ/5)
+
+#define BFQ_MIN_WEIGHT	1
+#define BFQ_MAX_WEIGHT	1000
+
+#define BFQ_DEFAULT_QUEUE_IOPRIO	4
+
+#define BFQ_DEFAULT_GRP_WEIGHT	10
+#define BFQ_DEFAULT_GRP_IOPRIO	0
+#define BFQ_DEFAULT_GRP_CLASS	IOPRIO_CLASS_BE
+
+struct bfq_entity;
+
+/**
+ * struct bfq_service_tree - per ioprio_class service tree.
+ * @active: tree for active entities (i.e., those backlogged).
+ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i).
+ * @first_idle: idle entity with minimum F_i.
+ * @last_idle: idle entity with maximum F_i.
+ * @vtime: scheduler virtual time.
+ * @wsum: scheduler weight sum; active and idle entities contribute to it.
+ *
+ * Each service tree represents a B-WF2Q+ scheduler on its own.  Each
+ * ioprio_class has its own independent scheduler, and so its own
+ * bfq_service_tree.  All the fields are protected by the queue lock
+ * of the containing bfqd.
+ */
+struct bfq_service_tree {
+	struct rb_root active;
+	struct rb_root idle;
+
+	struct bfq_entity *first_idle;
+	struct bfq_entity *last_idle;
+
+	u64 vtime;
+	unsigned long wsum;
+};
+
+/**
+ * struct bfq_sched_data - multi-class scheduler.
+ * @in_service_entity: entity in service.
+ * @next_in_service: head-of-the-line entity in the scheduler.
+ * @service_tree: array of service trees, one per ioprio_class.
+ *
+ * bfq_sched_data is the basic scheduler queue.  It supports three
+ * ioprio_classes, and can be used either as a toplevel queue or as
+ * an intermediate queue on a hierarchical setup.
+ * @next_in_service points to the active entity of the sched_data
+ * service trees that will be scheduled next.
+ *
+ * The supported ioprio_classes are the same as in CFQ, in descending
+ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
+ * Requests from higher priority queues are served before all the
+ * requests from lower priority queues; among requests of the same
+ * queue requests are served according to B-WF2Q+.
+ * All the fields are protected by the queue lock of the containing bfqd.
+ */
+struct bfq_sched_data {
+	struct bfq_entity *in_service_entity;
+	struct bfq_entity *next_in_service;
+	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
+};
+
+/**
+ * struct bfq_weight_counter - counter of the number of all active entities
+ *                             with a given weight.
+ * @weight: weight of the entities that this counter refers to.
+ * @num_active: number of active entities with this weight.
+ * @weights_node: weights tree member (see bfq_data's @queue_weights_tree
+ *                and @group_weights_tree).
+ */
+struct bfq_weight_counter {
+	short int weight;
+	unsigned int num_active;
+	struct rb_node weights_node;
+};
+
+/**
+ * struct bfq_entity - schedulable entity.
+ * @rb_node: service_tree member.
+ * @weight_counter: pointer to the weight counter associated with this entity.
+ * @on_st: flag, true if the entity is on a tree (either the active or
+ *         the idle one of its service_tree).
+ * @finish: B-WF2Q+ finish timestamp (aka F_i).
+ * @start: B-WF2Q+ start timestamp (aka S_i).
+ * @tree: tree the entity is enqueued into; %NULL if not on a tree.
+ * @min_start: minimum start time of the (active) subtree rooted at
+ *             this entity; used for O(log N) lookups into active trees.
+ * @service: service received during the last round of service.
+ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight.
+ * @weight: weight of the queue
+ * @parent: parent entity, for hierarchical scheduling.
+ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the
+ *                 associated scheduler queue, %NULL on leaf nodes.
+ * @sched_data: the scheduler queue this entity belongs to.
+ * @ioprio: the ioprio in use.
+ * @new_weight: when a weight change is requested, the new weight value.
+ * @orig_weight: original weight, used to implement weight boosting
+ * @new_ioprio: when an ioprio change is requested, the new ioprio value.
+ * @ioprio_class: the ioprio_class in use.
+ * @new_ioprio_class: when an ioprio_class change is requested, the new
+ *                    ioprio_class value.
+ * @ioprio_changed: flag, true when the user requested a weight, ioprio or
+ *                  ioprio_class change.
+ *
+ * A bfq_entity is used to represent either a bfq_queue (leaf node in the
+ * cgroup hierarchy) or a bfq_group into the upper level scheduler.  Each
+ * entity belongs to the sched_data of the parent group in the cgroup
+ * hierarchy.  Non-leaf entities have also their own sched_data, stored
+ * in @my_sched_data.
+ *
+ * Each entity stores independently its priority values; this would
+ * allow different weights on different devices, but this
+ * functionality is not exported to userspace by now.  Priorities and
+ * weights are updated lazily, first storing the new values into the
+ * new_* fields, then setting the @ioprio_changed flag.  As soon as
+ * there is a transition in the entity state that allows the priority
+ * update to take place the effective and the requested priority
+ * values are synchronized.
+ *
+ * Unless cgroups are used, the weight value is calculated from the
+ * ioprio to export the same interface as CFQ.  When dealing with
+ * ``well-behaved'' queues (i.e., queues that do not spend too much
+ * time to consume their budget and have true sequential behavior, and
+ * when there are no external factors breaking anticipation) the
+ * relative weights at each level of the cgroups hierarchy should be
+ * guaranteed.  All the fields are protected by the queue lock of the
+ * containing bfqd.
+ */
+struct bfq_entity {
+	struct rb_node rb_node;
+	struct bfq_weight_counter *weight_counter;
+
+	int on_st;
+
+	u64 finish;
+	u64 start;
+
+	struct rb_root *tree;
+
+	u64 min_start;
+
+	unsigned long service, budget;
+	unsigned short weight, new_weight;
+	unsigned short orig_weight;
+
+	struct bfq_entity *parent;
+
+	struct bfq_sched_data *my_sched_data;
+	struct bfq_sched_data *sched_data;
+
+	unsigned short ioprio, new_ioprio;
+	unsigned short ioprio_class, new_ioprio_class;
+
+	int ioprio_changed;
+};
+
+struct bfq_group;
+
+/**
+ * struct bfq_queue - leaf schedulable entity.
+ * @ref: reference counter.
+ * @bfqd: parent bfq_data.
+ * @new_bfqq: shared bfq_queue if queue is cooperating with
+ *           one or more other queues.
+ * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree).
+ * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree).
+ * @sort_list: sorted list of pending requests.
+ * @next_rq: if fifo isn't expired, next request to serve.
+ * @queued: nr of requests queued in @sort_list.
+ * @allocated: currently allocated requests.
+ * @meta_pending: pending metadata requests.
+ * @fifo: fifo list of requests in sort_list.
+ * @entity: entity representing this queue in the scheduler.
+ * @max_budget: maximum budget allowed from the feedback mechanism.
+ * @budget_timeout: budget expiration (in jiffies).
+ * @dispatched: number of requests on the dispatch list or inside driver.
+ * @flags: status flags.
+ * @bfqq_list: node for active/idle bfqq list inside our bfqd.
+ * @burst_list_node: node for the device's burst list.
+ * @seek_samples: number of seeks sampled
+ * @seek_total: sum of the distances of the seeks sampled
+ * @seek_mean: mean seek distance
+ * @last_request_pos: position of the last request enqueued
+ * @requests_within_timer: number of consecutive pairs of request completion
+ *                         and arrival, such that the queue becomes idle
+ *                         after the completion, but the next request arrives
+ *                         within an idle time slice; used only if the queue's
+ *                         IO_bound has been cleared.
+ * @pid: pid of the process owning the queue, used for logging purposes.
+ * @last_wr_start_finish: start time of the current weight-raising period if
+ *                        the @bfq-queue is being weight-raised, otherwise
+ *                        finish time of the last weight-raising period
+ * @wr_cur_max_time: current max raising time for this queue
+ * @soft_rt_next_start: minimum time instant such that, only if a new
+ *                      request is enqueued after this time instant in an
+ *                      idle @bfq_queue with no outstanding requests, then
+ *                      the task associated with the queue it is deemed as
+ *                      soft real-time (see the comments to the function
+ *                      bfq_bfqq_softrt_next_start())
+ * @last_idle_bklogged: time of the last transition of the @bfq_queue from
+ *                      idle to backlogged
+ * @service_from_backlogged: cumulative service received from the @bfq_queue
+ *                           since the last transition from idle to
+ *                           backlogged
+ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the
+ *	 queue is shared
+ *
+ * A bfq_queue is a leaf request queue; it can be associated with an
+ * io_context or more, if it  is  async or shared  between  cooperating
+ * processes. @cgroup holds a reference to the cgroup, to be sure that it
+ * does not disappear while a bfqq still references it (mostly to avoid
+ * races between request issuing and task migration followed by cgroup
+ * destruction).
+ * All the fields are protected by the queue lock of the containing bfqd.
+ */
+struct bfq_queue {
+	atomic_t ref;
+	struct bfq_data *bfqd;
+
+	/* fields for cooperating queues handling */
+	struct bfq_queue *new_bfqq;
+	struct rb_node pos_node;
+	struct rb_root *pos_root;
+
+	struct rb_root sort_list;
+	struct request *next_rq;
+	int queued[2];
+	int allocated[2];
+	int meta_pending;
+	struct list_head fifo;
+
+	struct bfq_entity entity;
+
+	unsigned long max_budget;
+	unsigned long budget_timeout;
+
+	int dispatched;
+
+	unsigned int flags;
+
+	struct list_head bfqq_list;
+
+	struct hlist_node burst_list_node;
+
+	unsigned int seek_samples;
+	u64 seek_total;
+	sector_t seek_mean;
+	sector_t last_request_pos;
+
+	unsigned int requests_within_timer;
+
+	pid_t pid;
+	struct bfq_io_cq *bic;
+
+	/* weight-raising fields */
+	unsigned long wr_cur_max_time;
+	unsigned long soft_rt_next_start;
+	unsigned long last_wr_start_finish;
+	unsigned int wr_coeff;
+	unsigned long last_idle_bklogged;
+	unsigned long service_from_backlogged;
+};
+
+/**
+ * struct bfq_ttime - per process thinktime stats.
+ * @ttime_total: total process thinktime
+ * @ttime_samples: number of thinktime samples
+ * @ttime_mean: average process thinktime
+ */
+struct bfq_ttime {
+	unsigned long last_end_request;
+
+	unsigned long ttime_total;
+	unsigned long ttime_samples;
+	unsigned long ttime_mean;
+};
+
+/**
+ * struct bfq_io_cq - per (request_queue, io_context) structure.
+ * @icq: associated io_cq structure
+ * @bfqq: array of two process queues, the sync and the async
+ * @ttime: associated @bfq_ttime struct
+ * @wr_time_left: snapshot of the time left before weight raising ends
+ *                for the sync queue associated to this process; this
+ *		  snapshot is taken to remember this value while the weight
+ *		  raising is suspended because the queue is merged with a
+ *		  shared queue, and is used to set @raising_cur_max_time
+ *		  when the queue is split from the shared queue and its
+ *		  weight is raised again
+ * @saved_idle_window: same purpose as the previous field for the idle
+ *                     window
+ * @saved_IO_bound: same purpose as the previous two fields for the I/O
+ *                  bound classification of a queue
+ * @saved_in_large_burst: same purpose as the previous fields for the
+ *                        value of the field keeping the queue's belonging
+ *                        to a large burst
+ * @was_in_burst_list: true if the queue belonged to a burst list
+ *                     before its merge with another cooperating queue
+ * @cooperations: counter of consecutive successful queue merges underwent
+ *                by any of the process' @bfq_queues
+ * @failed_cooperations: counter of consecutive failed queue merges of any
+ *                       of the process' @bfq_queues
+ */
+struct bfq_io_cq {
+	struct io_cq icq; /* must be the first member */
+	struct bfq_queue *bfqq[2];
+	struct bfq_ttime ttime;
+	int ioprio;
+
+	unsigned int wr_time_left;
+	bool saved_idle_window;
+	bool saved_IO_bound;
+
+	bool saved_in_large_burst;
+	bool was_in_burst_list;
+
+	unsigned int cooperations;
+	unsigned int failed_cooperations;
+};
+
+enum bfq_device_speed {
+	BFQ_BFQD_FAST,
+	BFQ_BFQD_SLOW,
+};
+
+/**
+ * struct bfq_data - per device data structure.
+ * @queue: request queue for the managed device.
+ * @root_group: root bfq_group for the device.
+ * @rq_pos_tree: rbtree sorted by next_request position, used when
+ *               determining if two or more queues have interleaving
+ *               requests (see bfq_close_cooperator()).
+ * @active_numerous_groups: number of bfq_groups containing more than one
+ *                          active @bfq_entity.
+ * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by
+ *                      weight. Used to keep track of whether all @bfq_queues
+ *                     have the same weight. The tree contains one counter
+ *                     for each distinct weight associated to some active
+ *                     and not weight-raised @bfq_queue (see the comments to
+ *                      the functions bfq_weights_tree_[add|remove] for
+ *                     further details).
+ * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted
+ *                      by weight. Used to keep track of whether all
+ *                     @bfq_groups have the same weight. The tree contains
+ *                     one counter for each distinct weight associated to
+ *                     some active @bfq_group (see the comments to the
+ *                     functions bfq_weights_tree_[add|remove] for further
+ *                     details).
+ * @busy_queues: number of bfq_queues containing requests (including the
+ *		 queue in service, even if it is idling).
+ * @busy_in_flight_queues: number of @bfq_queues containing pending or
+ *                         in-flight requests, plus the @bfq_queue in
+ *                         service, even if idle but waiting for the
+ *                         possible arrival of its next sync request. This
+ *                         field is updated only if the device is rotational,
+ *                         but used only if the device is also NCQ-capable.
+ *                         The reason why the field is updated also for non-
+ *                         NCQ-capable rotational devices is related to the
+ *                         fact that the value of @hw_tag may be set also
+ *                         later than when busy_in_flight_queues may need to
+ *                         be incremented for the first time(s). Taking also
+ *                         this possibility into account, to avoid unbalanced
+ *                         increments/decrements, would imply more overhead
+ *                         than just updating busy_in_flight_queues
+ *                         regardless of the value of @hw_tag.
+ * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues
+ *                                     (that is, seeky queues that expired
+ *                                     for budget timeout at least once)
+ *                                     containing pending or in-flight
+ *                                     requests, including the in-service
+ *                                     @bfq_queue if constantly seeky. This
+ *                                     field is updated only if the device
+ *                                     is rotational, but used only if the
+ *                                     device is also NCQ-capable (see the
+ *                                     comments to @busy_in_flight_queues).
+ * @wr_busy_queues: number of weight-raised busy @bfq_queues.
+ * @queued: number of queued requests.
+ * @rq_in_driver: number of requests dispatched and waiting for completion.
+ * @sync_flight: number of sync requests in the driver.
+ * @max_rq_in_driver: max number of reqs in driver in the last
+ *                    @hw_tag_samples completed requests.
+ * @hw_tag_samples: nr of samples used to calculate hw_tag.
+ * @hw_tag: flag set to one if the driver is showing a queueing behavior.
+ * @budgets_assigned: number of budgets assigned.
+ * @idle_slice_timer: timer set when idling for the next sequential request
+ *                    from the queue in service.
+ * @unplug_work: delayed work to restart dispatching on the request queue.
+ * @in_service_queue: bfq_queue in service.
+ * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue.
+ * @last_position: on-disk position of the last served request.
+ * @last_budget_start: beginning of the last budget.
+ * @last_idling_start: beginning of the last idle slice.
+ * @peak_rate: peak transfer rate observed for a budget.
+ * @peak_rate_samples: number of samples used to calculate @peak_rate.
+ * @bfq_max_budget: maximum budget allotted to a bfq_queue before
+ *                  rescheduling.
+ * @group_list: list of all the bfq_groups active on the device.
+ * @active_list: list of all the bfq_queues active on the device.
+ * @idle_list: list of all the bfq_queues idle on the device.
+ * @bfq_fifo_expire: timeout for async/sync requests; when it expires
+ *                   requests are served in fifo order.
+ * @bfq_back_penalty: weight of backward seeks wrt forward ones.
+ * @bfq_back_max: maximum allowed backward seek.
+ * @bfq_slice_idle: maximum idling time.
+ * @bfq_user_max_budget: user-configured max budget value
+ *                       (0 for auto-tuning).
+ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to
+ *                           async queues.
+ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to
+ *               to prevent seeky queues to impose long latencies to well
+ *               behaved ones (this also implies that seeky queues cannot
+ *               receive guarantees in the service domain; after a timeout
+ *               they are charged for the whole allocated budget, to try
+ *               to preserve a behavior reasonably fair among them, but
+ *               without service-domain guarantees).
+ * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is
+ *                   no more granted any weight-raising.
+ * @bfq_failed_cooperations: number of consecutive failed cooperation
+ *                           chances after which weight-raising is restored
+ *                           to a queue subject to more than bfq_coop_thresh
+ *                           queue merges.
+ * @bfq_requests_within_timer: number of consecutive requests that must be
+ *                             issued within the idle time slice to set
+ *                             again idling to a queue which was marked as
+ *                             non-I/O-bound (see the definition of the
+ *                             IO_bound flag for further details).
+ * @last_ins_in_burst: last time at which a queue entered the current
+ *                     burst of queues being activated shortly after
+ *                     each other; for more details about this and the
+ *                     following parameters related to a burst of
+ *                     activations, see the comments to the function
+ *                     @bfq_handle_burst.
+ * @bfq_burst_interval: reference time interval used to decide whether a
+ *                      queue has been activated shortly after
+ *                      @last_ins_in_burst.
+ * @burst_size: number of queues in the current burst of queue activations.
+ * @bfq_large_burst_thresh: maximum burst size above which the current
+ * 			    queue-activation burst is deemed as 'large'.
+ * @large_burst: true if a large queue-activation burst is in progress.
+ * @burst_list: head of the burst list (as for the above fields, more details
+ * 		in the comments to the function bfq_handle_burst).
+ * @low_latency: if set to true, low-latency heuristics are enabled.
+ * @bfq_wr_coeff: maximum factor by which the weight of a weight-raised
+ *                queue is multiplied.
+ * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies).
+ * @bfq_wr_rt_max_time: maximum duration for soft real-time processes.
+ * @bfq_wr_min_idle_time: minimum idle period after which weight-raising
+ *			  may be reactivated for a queue (in jiffies).
+ * @bfq_wr_min_inter_arr_async: minimum period between request arrivals
+ *				after which weight-raising may be
+ *				reactivated for an already busy queue
+ *				(in jiffies).
+ * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue,
+ *			    sectors per seconds.
+ * @RT_prod: cached value of the product R*T used for computing the maximum
+ *	     duration of the weight raising automatically.
+ * @device_speed: device-speed class for the low-latency heuristic.
+ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions.
+ *
+ * All the fields are protected by the @queue lock.
+ */
+struct bfq_data {
+	struct request_queue *queue;
+
+	struct bfq_group *root_group;
+	struct rb_root rq_pos_tree;
+
+#ifdef CONFIG_CGROUP_BFQIO
+	int active_numerous_groups;
+#endif
+
+	struct rb_root queue_weights_tree;
+	struct rb_root group_weights_tree;
+
+	int busy_queues;
+	int busy_in_flight_queues;
+	int const_seeky_busy_in_flight_queues;
+	int wr_busy_queues;
+	int queued;
+	int rq_in_driver;
+	int sync_flight;
+
+	int max_rq_in_driver;
+	int hw_tag_samples;
+	int hw_tag;
+
+	int budgets_assigned;
+
+	struct timer_list idle_slice_timer;
+	struct work_struct unplug_work;
+
+	struct bfq_queue *in_service_queue;
+	struct bfq_io_cq *in_service_bic;
+
+	sector_t last_position;
+
+	ktime_t last_budget_start;
+	ktime_t last_idling_start;
+	int peak_rate_samples;
+	u64 peak_rate;
+	unsigned long bfq_max_budget;
+
+	struct hlist_head group_list;
+	struct list_head active_list;
+	struct list_head idle_list;
+
+	unsigned int bfq_fifo_expire[2];
+	unsigned int bfq_back_penalty;
+	unsigned int bfq_back_max;
+	unsigned int bfq_slice_idle;
+	u64 bfq_class_idle_last_service;
+
+	unsigned int bfq_user_max_budget;
+	unsigned int bfq_max_budget_async_rq;
+	unsigned int bfq_timeout[2];
+
+	unsigned int bfq_coop_thresh;
+	unsigned int bfq_failed_cooperations;
+	unsigned int bfq_requests_within_timer;
+
+	unsigned long last_ins_in_burst;
+	unsigned long bfq_burst_interval;
+	int burst_size;
+	unsigned long bfq_large_burst_thresh;
+	bool large_burst;
+	struct hlist_head burst_list;
+
+	bool low_latency;
+
+	/* parameters of the low_latency heuristics */
+	unsigned int bfq_wr_coeff;
+	unsigned int bfq_wr_max_time;
+	unsigned int bfq_wr_rt_max_time;
+	unsigned int bfq_wr_min_idle_time;
+	unsigned long bfq_wr_min_inter_arr_async;
+	unsigned int bfq_wr_max_softrt_rate;
+	u64 RT_prod;
+	enum bfq_device_speed device_speed;
+
+	struct bfq_queue oom_bfqq;
+};
+
+enum bfqq_state_flags {
+	BFQ_BFQQ_FLAG_busy = 0,		/* has requests or is in service */
+	BFQ_BFQQ_FLAG_wait_request,	/* waiting for a request */
+	BFQ_BFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
+	BFQ_BFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
+	BFQ_BFQQ_FLAG_idle_window,	/* slice idling enabled */
+	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */
+	BFQ_BFQQ_FLAG_budget_new,	/* no completion with this budget */
+	BFQ_BFQQ_FLAG_IO_bound,		/*
+					 * bfqq has timed-out at least once
+					 * having consumed at most 2/10 of
+					 * its budget
+					 */
+	BFQ_BFQQ_FLAG_in_large_burst,	/*
+					 * bfqq activated in a large burst,
+					 * see comments to bfq_handle_burst.
+					 */
+	BFQ_BFQQ_FLAG_constantly_seeky,	/*
+					 * bfqq has proved to be slow and
+					 * seeky until budget timeout
+					 */
+	BFQ_BFQQ_FLAG_softrt_update,	/*
+					 * may need softrt-next-start
+					 * update
+					 */
+	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */
+	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be split */
+	BFQ_BFQQ_FLAG_just_split,	/* queue has just been split */
+};
+
+#define BFQ_BFQQ_FNS(name)						\
+static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq)		\
+{									\
+	(bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name);			\
+}									\
+static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq)	\
+{									\
+	(bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name);			\
+}									\
+static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq)		\
+{									\
+	return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0;	\
+}
+
+BFQ_BFQQ_FNS(busy);
+BFQ_BFQQ_FNS(wait_request);
+BFQ_BFQQ_FNS(must_alloc);
+BFQ_BFQQ_FNS(fifo_expire);
+BFQ_BFQQ_FNS(idle_window);
+BFQ_BFQQ_FNS(sync);
+BFQ_BFQQ_FNS(budget_new);
+BFQ_BFQQ_FNS(IO_bound);
+BFQ_BFQQ_FNS(in_large_burst);
+BFQ_BFQQ_FNS(constantly_seeky);
+BFQ_BFQQ_FNS(coop);
+BFQ_BFQQ_FNS(split_coop);
+BFQ_BFQQ_FNS(just_split);
+BFQ_BFQQ_FNS(softrt_update);
+#undef BFQ_BFQQ_FNS
+
+/* Logging facilities. */
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
+	blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args)
+
+#define bfq_log(bfqd, fmt, args...) \
+	blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
+
+/* Expiration reasons. */
+enum bfqq_expiration {
+	BFQ_BFQQ_TOO_IDLE = 0,		/*
+					 * queue has been idling for
+					 * too long
+					 */
+	BFQ_BFQQ_BUDGET_TIMEOUT,	/* budget took too long to be used */
+	BFQ_BFQQ_BUDGET_EXHAUSTED,	/* budget consumed */
+	BFQ_BFQQ_NO_MORE_REQUESTS,	/* the queue has no more requests */
+};
+
+#ifdef CONFIG_CGROUP_BFQIO
+/**
+ * struct bfq_group - per (device, cgroup) data structure.
+ * @entity: schedulable entity to insert into the parent group sched_data.
+ * @sched_data: own sched_data, to contain child entities (they may be
+ *              both bfq_queues and bfq_groups).
+ * @group_node: node to be inserted into the bfqio_cgroup->group_data
+ *              list of the containing cgroup's bfqio_cgroup.
+ * @bfqd_node: node to be inserted into the @bfqd->group_list list
+ *             of the groups active on the same device; used for cleanup.
+ * @bfqd: the bfq_data for the device this group acts upon.
+ * @async_bfqq: array of async queues for all the tasks belonging to
+ *              the group, one queue per ioprio value per ioprio_class,
+ *              except for the idle class that has only one queue.
+ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
+ * @my_entity: pointer to @entity, %NULL for the toplevel group; used
+ *             to avoid too many special cases during group creation/
+ *             migration.
+ * @active_entities: number of active entities belonging to the group;
+ *                   unused for the root group. Used to know whether there
+ *                   are groups with more than one active @bfq_entity
+ *                   (see the comments to the function
+ *                   bfq_bfqq_must_not_expire()).
+ *
+ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
+ * there is a set of bfq_groups, each one collecting the lower-level
+ * entities belonging to the group that are acting on the same device.
+ *
+ * Locking works as follows:
+ *    o @group_node is protected by the bfqio_cgroup lock, and is accessed
+ *      via RCU from its readers.
+ *    o @bfqd is protected by the queue lock, RCU is used to access it
+ *      from the readers.
+ *    o All the other fields are protected by the @bfqd queue lock.
+ */
+struct bfq_group {
+	struct bfq_entity entity;
+	struct bfq_sched_data sched_data;
+
+	struct hlist_node group_node;
+	struct hlist_node bfqd_node;
+
+	void *bfqd;
+
+	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+	struct bfq_queue *async_idle_bfqq;
+
+	struct bfq_entity *my_entity;
+
+	int active_entities;
+};
+
+/**
+ * struct bfqio_cgroup - bfq cgroup data structure.
+ * @css: subsystem state for bfq in the containing cgroup.
+ * @online: flag marked when the subsystem is inserted.
+ * @weight: cgroup weight.
+ * @ioprio: cgroup ioprio.
+ * @ioprio_class: cgroup ioprio_class.
+ * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data.
+ * @group_data: list containing the bfq_group belonging to this cgroup.
+ *
+ * @group_data is accessed using RCU, with @lock protecting the updates,
+ * @ioprio and @ioprio_class are protected by @lock.
+ */
+struct bfqio_cgroup {
+	struct cgroup_subsys_state css;
+	bool online;
+
+	unsigned short weight, ioprio, ioprio_class;
+
+	spinlock_t lock;
+	struct hlist_head group_data;
+};
+#else
+struct bfq_group {
+	struct bfq_sched_data sched_data;
+
+	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+	struct bfq_queue *async_idle_bfqq;
+};
+#endif
+
+static inline struct bfq_service_tree *
+bfq_entity_service_tree(struct bfq_entity *entity)
+{
+	struct bfq_sched_data *sched_data = entity->sched_data;
+	unsigned int idx = entity->ioprio_class - 1;
+
+	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
+	BUG_ON(sched_data == NULL);
+
+	return sched_data->service_tree + idx;
+}
+
+static inline struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic,
+					    bool is_sync)
+{
+	return bic->bfqq[is_sync];
+}
+
+static inline void bic_set_bfqq(struct bfq_io_cq *bic,
+				struct bfq_queue *bfqq, bool is_sync)
+{
+	bic->bfqq[is_sync] = bfqq;
+}
+
+static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
+{
+	return bic->icq.q->elevator->elevator_data;
+}
+
+/**
+ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer.
+ * @ptr: a pointer to a bfqd.
+ * @flags: storage for the flags to be saved.
+ *
+ * This function allows bfqg->bfqd to be protected by the
+ * queue lock of the bfqd they reference; the pointer is dereferenced
+ * under RCU, so the storage for bfqd is assured to be safe as long
+ * as the RCU read side critical section does not end.  After the
+ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be
+ * sure that no other writer accessed it.  If we raced with a writer,
+ * the function returns NULL, with the queue unlocked, otherwise it
+ * returns the dereferenced pointer, with the queue locked.
+ */
+static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr,
+						   unsigned long *flags)
+{
+	struct bfq_data *bfqd;
+
+	rcu_read_lock();
+	bfqd = rcu_dereference(*(struct bfq_data **)ptr);
+
+	if (bfqd != NULL) {
+		spin_lock_irqsave(bfqd->queue->queue_lock, *flags);
+		if (*ptr == bfqd)
+			goto out;
+		spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
+	}
+
+	bfqd = NULL;
+out:
+	rcu_read_unlock();
+	return bfqd;
+}
+
+static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd,
+				       unsigned long *flags)
+{
+	spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
+}
+
+static void bfq_check_ioprio_change(struct bfq_io_cq *bic);
+static void bfq_put_queue(struct bfq_queue *bfqq);
+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+				       struct bfq_group *bfqg, int is_sync,
+				       struct bfq_io_cq *bic, gfp_t gfp_mask);
+static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+				    struct bfq_group *bfqg);
+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+
+#endif /* _BFQ_H */
diff -Nur linux-4.2/block/blk-core.c linux-4.2-pck/block/blk-core.c
--- linux-4.2/block/blk-core.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/block/blk-core.c	2015-09-08 11:20:31.960370046 -0300
@@ -48,6 +48,8 @@
 
 DEFINE_IDA(blk_queue_ida);
 
+int trap_non_toi_io;
+
 /*
  * For the allocated request tables
  */
@@ -1988,6 +1990,9 @@
 {
 	bio->bi_rw |= rw;
 
+	if (unlikely(trap_non_toi_io))
+		BUG_ON(!(bio->bi_flags & (1 << BIO_TOI)));
+
 	/*
 	 * If it's a regular read/write or a barrier with data attached,
 	 * go through the normal accounting stuff before submission.
diff -Nur linux-4.2/block/genhd.c linux-4.2-pck/block/genhd.c
--- linux-4.2/block/genhd.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/block/genhd.c	2015-09-08 11:20:31.977035898 -0300
@@ -18,6 +18,8 @@
 #include <linux/kobj_map.h>
 #include <linux/mutex.h>
 #include <linux/idr.h>
+#include <linux/ctype.h>
+#include <linux/fs_uuid.h>
 #include <linux/log2.h>
 #include <linux/pm_runtime.h>
 
@@ -1384,6 +1386,85 @@
 
 EXPORT_SYMBOL(invalidate_partition);
 
+dev_t blk_lookup_fs_info(struct fs_info *seek)
+{
+	dev_t devt = MKDEV(0, 0);
+	struct class_dev_iter iter;
+	struct device *dev;
+	int best_score = 0;
+
+	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+	while (best_score < 3 && (dev = class_dev_iter_next(&iter))) {
+		struct gendisk *disk = dev_to_disk(dev);
+		struct disk_part_iter piter;
+		struct hd_struct *part;
+
+		disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+
+		while (best_score < 3 && (part = disk_part_iter_next(&piter))) {
+			int score = part_matches_fs_info(part, seek);
+			if (score > best_score) {
+				devt = part_devt(part);
+				best_score = score;
+			}
+		}
+		disk_part_iter_exit(&piter);
+	}
+	class_dev_iter_exit(&iter);
+	return devt;
+}
+
+/* Caller uses NULL, key to start. For each match found, we return a bdev on
+ * which we have done blkdev_get, and we do the blkdev_put on block devices
+ * that are passed to us. When no more matches are found, we return NULL.
+ */
+struct block_device *next_bdev_of_type(struct block_device *last,
+	const char *key)
+{
+	dev_t devt = MKDEV(0, 0);
+	struct class_dev_iter iter;
+	struct device *dev;
+	struct block_device *next = NULL, *bdev;
+	int got_last = 0;
+
+	if (!key)
+		goto out;
+
+	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+	while (!devt && (dev = class_dev_iter_next(&iter))) {
+		struct gendisk *disk = dev_to_disk(dev);
+		struct disk_part_iter piter;
+		struct hd_struct *part;
+
+		disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+
+		while ((part = disk_part_iter_next(&piter))) {
+			bdev = bdget(part_devt(part));
+			if (last && !got_last) {
+				if (last == bdev)
+					got_last = 1;
+				continue;
+			}
+
+			if (blkdev_get(bdev, FMODE_READ, 0))
+				continue;
+
+			if (bdev_matches_key(bdev, key)) {
+				next = bdev;
+				break;
+			}
+
+			blkdev_put(bdev, FMODE_READ);
+		}
+		disk_part_iter_exit(&piter);
+	}
+	class_dev_iter_exit(&iter);
+out:
+	if (last)
+		blkdev_put(last, FMODE_READ);
+	return next;
+}
+
 /*
  * Disk events - monitor disk events like media change and eject request.
  */
diff -Nur linux-4.2/block/uuid.c linux-4.2-pck/block/uuid.c
--- linux-4.2/block/uuid.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/block/uuid.c	2015-09-08 11:20:31.977035898 -0300
@@ -0,0 +1,509 @@
+#include <linux/blkdev.h>
+#include <linux/ctype.h>
+#include <linux/fs_uuid.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+static int debug_enabled;
+
+#define PRINTK(fmt, args...) do {					\
+	if (debug_enabled)						\
+		printk(KERN_DEBUG fmt, ## args);			\
+	} while(0)
+
+#define PRINT_HEX_DUMP(v1, v2, v3, v4, v5, v6, v7, v8)			\
+	do {								\
+		if (debug_enabled)					\
+			print_hex_dump(v1, v2, v3, v4, v5, v6, v7, v8);	\
+	} while(0)
+
+/*
+ * Simple UUID translation
+ */
+
+struct uuid_info {
+	const char *key;
+	const char *name;
+	long bkoff;
+	unsigned sboff;
+	unsigned sig_len;
+	const char *magic;
+	int uuid_offset;
+	int last_mount_offset;
+	int last_mount_size;
+};
+
+/*
+ * Based on libuuid's blkid_magic array. Note that I don't
+ * have uuid offsets for all of these yet - mssing ones are 0x0.
+ * Further information welcome.
+ *
+ * Rearranged by page of fs signature for optimisation.
+ */
+static struct uuid_info uuid_list[] = {
+ { NULL, "oracleasm", 0, 32, 8, "ORCLDISK", 0x0, 0, 0 },
+ { "ntfs", "ntfs", 0, 3, 8, "NTFS    ", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x52, 5, "MSWIN", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x52, 8, "FAT32   ", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x36, 5, "MSDOS", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x36, 8, "FAT16   ", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x36, 8, "FAT12   ", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0, 1, "\353", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0, 1, "\351", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x1fe, 2, "\125\252", 0x0, 0, 0 },
+ { "xfs", "xfs", 0, 0, 4, "XFSB", 0x20, 0, 0 },
+ { "romfs", "romfs", 0, 0, 8, "-rom1fs-", 0x0, 0, 0 },
+ { "bfs", "bfs", 0, 0, 4, "\316\372\173\033", 0, 0, 0 },
+ { "cramfs", "cramfs", 0, 0, 4, "E=\315\050", 0x0, 0, 0 },
+ { "qnx4", "qnx4", 0, 4, 6, "QNX4FS", 0, 0, 0 },
+ { NULL, "crypt_LUKS", 0, 0, 6, "LUKS\xba\xbe", 0x0, 0, 0 },
+ { "squashfs", "squashfs", 0, 0, 4, "sqsh", 0, 0, 0 },
+ { "squashfs", "squashfs", 0, 0, 4, "hsqs", 0, 0, 0 },
+ { "ocfs", "ocfs", 0, 8, 9, "OracleCFS", 0x0, 0, 0 },
+ { "lvm2pv", "lvm2pv", 0, 0x018, 8, "LVM2 001", 0x0, 0, 0 },
+ { "sysv", "sysv", 0, 0x3f8, 4, "\020~\030\375", 0, 0, 0 },
+ { "ext", "ext", 1, 0x38, 2, "\123\357", 0x468, 0x42c, 4 },
+ { "minix", "minix", 1, 0x10, 2, "\177\023", 0, 0, 0 },
+ { "minix", "minix", 1, 0x10, 2, "\217\023", 0, 0, 0 },
+ { "minix", "minix", 1, 0x10, 2, "\150\044", 0, 0, 0 },
+ { "minix", "minix", 1, 0x10, 2, "\170\044", 0, 0, 0 },
+ { "lvm2pv", "lvm2pv", 1, 0x018, 8, "LVM2 001", 0x0, 0, 0 },
+ { "vxfs", "vxfs", 1, 0, 4, "\365\374\001\245", 0, 0, 0 },
+ { "hfsplus", "hfsplus", 1, 0, 2, "BD", 0x0, 0, 0 },
+ { "hfsplus", "hfsplus", 1, 0, 2, "H+", 0x0, 0, 0 },
+ { "hfsplus", "hfsplus", 1, 0, 2, "HX", 0x0, 0, 0 },
+ { "hfs", "hfs", 1, 0, 2, "BD", 0x0, 0, 0 },
+ { "ocfs2", "ocfs2", 1, 0, 6, "OCFSV2", 0x0, 0, 0 },
+ { "lvm2pv", "lvm2pv", 0, 0x218, 8, "LVM2 001", 0x0, 0, 0 },
+ { "lvm2pv", "lvm2pv", 1, 0x218, 8, "LVM2 001", 0x0, 0, 0 },
+ { "ocfs2", "ocfs2", 2, 0, 6, "OCFSV2", 0x0, 0, 0 },
+ { "swap", "swap", 0, 0xff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0xff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
+ { "ocfs2", "ocfs2", 4, 0, 6, "OCFSV2", 0x0, 0, 0 },
+ { "ocfs2", "ocfs2", 8, 0, 6, "OCFSV2", 0x0, 0, 0 },
+ { "hpfs", "hpfs", 8, 0, 4, "I\350\225\371", 0, 0, 0 },
+ { "reiserfs", "reiserfs", 8, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 },
+ { "reiserfs", "reiserfs", 8, 20, 8, "ReIsErFs", 0x10054, 0, 0 },
+ { "zfs", "zfs", 8, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 },
+ { "zfs", "zfs", 8, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 },
+ { "ufs", "ufs", 8, 0x55c, 4, "T\031\001\000", 0, 0, 0 },
+ { "swap", "swap", 0, 0x1ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0x1ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x1ff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x1ff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x1ff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
+ { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr2Fs", 0x10054, 0, 0 },
+ { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr3Fs", 0x10054, 0, 0 },
+ { "reiserfs", "reiserfs", 64, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 },
+ { "reiser4", "reiser4", 64, 0, 7, "ReIsEr4", 0x100544, 0, 0 },
+ { "gfs2", "gfs2", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 },
+ { "gfs", "gfs", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 },
+ { "btrfs", "btrfs", 64, 0x40, 8, "_BHRfS_M", 0x0, 0, 0 },
+ { "swap", "swap", 0, 0x3ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0x3ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x3ff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x3ff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x3ff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "BEA01", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "BOOT2", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "CD001", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "CDW02", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "NSR02", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "NSR03", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "TEA01", 0x0, 0, 0 },
+ { "iso9660", "iso9660", 32, 1, 5, "CD001", 0x0, 0, 0 },
+ { "iso9660", "iso9660", 32, 9, 5, "CDROM", 0x0, 0, 0 },
+ { "jfs", "jfs", 32, 0, 4, "JFS1", 0x88, 0, 0 },
+ { "swap", "swap", 0, 0x7ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0x7ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x7ff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x7ff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x7ff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0xfff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0xfff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xfff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xfff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xfff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
+ { "zfs", "zfs", 264, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 },
+ { "zfs", "zfs", 264, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 },
+ { NULL, NULL, 0, 0, 0, NULL, 0x0, 0, 0 }
+};
+
+static int null_uuid(const char *uuid)
+{
+	int i;
+
+	for (i = 0; i < 16 && !uuid[i]; i++);
+
+	return (i == 16);
+}
+
+
+static void uuid_end_bio(struct bio *bio, int err)
+{
+	struct page *page = bio->bi_io_vec[0].bv_page;
+
+	if(!test_bit(BIO_UPTODATE, &bio->bi_flags))
+		SetPageError(page);
+
+	unlock_page(page);
+	bio_put(bio);
+}
+
+
+/**
+ * submit - submit BIO request
+ * @dev: The block device we're using.
+ * @page_num: The page we're reading.
+ *
+ * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the
+ * textbook - allocate and initialize the bio. If we're writing, make sure
+ * the page is marked as dirty. Then submit it and carry on."
+ **/
+static struct page *read_bdev_page(struct block_device *dev, int page_num)
+{
+	struct bio *bio = NULL;
+	struct page *page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+
+	if (!page) {
+		printk(KERN_ERR "Failed to allocate a page for reading data "
+				"in UUID checks.");
+		return NULL;
+	}
+
+	bio = bio_alloc(GFP_NOFS, 1);
+	bio->bi_bdev = dev;
+	bio->bi_iter.bi_sector = page_num << 3;
+	bio->bi_end_io = uuid_end_bio;
+	bio->bi_flags |= (1 << BIO_TOI);
+
+	PRINTK("Submitting bio on device %lx, page %d using bio %p and page %p.\n",
+			(unsigned long) dev->bd_dev, page_num, bio, page);
+
+	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+		printk(KERN_DEBUG "ERROR: adding page to bio at %d\n",
+				page_num);
+		bio_put(bio);
+		__free_page(page);
+		printk(KERN_DEBUG "read_bdev_page freed page %p (in error "
+				"path).\n", page);
+		return NULL;
+	}
+
+	lock_page(page);
+	submit_bio(READ | REQ_SYNC, bio);
+
+	wait_on_page_locked(page);
+	if (PageError(page)) {
+		__free_page(page);
+		page = NULL;
+	}
+	return page;
+}
+
+int bdev_matches_key(struct block_device *bdev, const char *key)
+{
+	unsigned char *data = NULL;
+	struct page *data_page = NULL;
+
+	int dev_offset, pg_num, pg_off, i;
+	int last_pg_num = -1;
+	int result = 0;
+	char buf[50];
+
+	if (null_uuid(key)) {
+		PRINTK("Refusing to find a NULL key.\n");
+		return 0;
+	}
+
+	if (!bdev->bd_disk) {
+		bdevname(bdev, buf);
+		PRINTK("bdev %s has no bd_disk.\n", buf);
+		return 0;
+	}
+
+	if (!bdev->bd_disk->queue) {
+		bdevname(bdev, buf);
+		PRINTK("bdev %s has no queue.\n", buf);
+		return 0;
+	}
+
+	for (i = 0; uuid_list[i].name; i++) {
+		struct uuid_info *dat = &uuid_list[i];
+
+		if (!dat->key || strcmp(dat->key, key))
+			continue;
+
+		dev_offset = (dat->bkoff << 10) + dat->sboff;
+		pg_num = dev_offset >> 12;
+		pg_off = dev_offset & 0xfff;
+
+		if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1)
+			continue;
+
+		if (pg_num != last_pg_num) {
+			if (data_page) {
+				kunmap(data_page);
+				__free_page(data_page);
+			}
+			data_page = read_bdev_page(bdev, pg_num);
+			if (!data_page)
+				continue;
+			data = kmap(data_page);
+		}
+
+		last_pg_num = pg_num;
+
+		if (strncmp(&data[pg_off], dat->magic, dat->sig_len))
+			continue;
+
+		result = 1;
+		break;
+	}
+
+	if (data_page) {
+		kunmap(data_page);
+		__free_page(data_page);
+	}
+
+	return result;
+}
+
+/* 
+ * part_matches_fs_info - Does the given partition match the details given?
+ *
+ * Returns a score saying how good the match is.
+ * 0 = no UUID match.
+ * 1 = UUID but last mount time differs.
+ * 2 = UUID, last mount time but not dev_t
+ * 3 = perfect match
+ *
+ * This lets us cope elegantly with probing resulting in dev_ts changing
+ * from boot to boot, and with the case where a user copies a partition
+ * (UUID is non unique), and we need to check the last mount time of the
+ * correct partition.
+ */
+int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek)
+{
+	struct block_device *bdev;
+	struct fs_info *got;
+	int result = 0;
+	char buf[50];
+
+	if (null_uuid((char *) &seek->uuid)) {
+		PRINTK("Refusing to find a NULL uuid.\n");
+		return 0;
+	}
+
+	bdev = bdget(part_devt(part));
+
+	PRINTK("part_matches fs info considering %x.\n", part_devt(part));
+
+	if (blkdev_get(bdev, FMODE_READ, 0)) {
+		PRINTK("blkdev_get failed.\n");
+		return 0;
+	}
+
+	if (!bdev->bd_disk) {
+		bdevname(bdev, buf);
+		PRINTK("bdev %s has no bd_disk.\n", buf);
+		goto out;
+	}
+
+	if (!bdev->bd_disk->queue) {
+		bdevname(bdev, buf);
+		PRINTK("bdev %s has no queue.\n", buf);
+		goto out;
+	}
+
+	got = fs_info_from_block_dev(bdev);
+
+	if (got && !memcmp(got->uuid, seek->uuid, 16)) {
+		PRINTK(" Have matching UUID.\n");
+		PRINTK(" Got: LMS %d, LM %p.\n", got->last_mount_size, got->last_mount);
+		PRINTK(" Seek: LMS %d, LM %p.\n", seek->last_mount_size, seek->last_mount);
+		result = 1;
+
+		if (got->last_mount_size == seek->last_mount_size &&
+		    got->last_mount && seek->last_mount &&
+		    !memcmp(got->last_mount, seek->last_mount,
+			    got->last_mount_size)) {
+			result = 2;
+
+			PRINTK(" Matching last mount time.\n");
+
+			if (part_devt(part) == seek->dev_t) {
+				result = 3;
+				PRINTK(" Matching dev_t.\n");
+			} else
+				PRINTK("Dev_ts differ (%x vs %x).\n", part_devt(part), seek->dev_t);
+		}
+	}
+
+	PRINTK(" Score for %x is %d.\n", part_devt(part), result);
+	free_fs_info(got);
+out:
+	blkdev_put(bdev, FMODE_READ);
+	return result;
+}
+
+void free_fs_info(struct fs_info *fs_info)
+{
+	if (!fs_info || IS_ERR(fs_info))
+		return;
+
+	if (fs_info->last_mount)
+		kfree(fs_info->last_mount);
+
+	kfree(fs_info);
+}
+
+struct fs_info *fs_info_from_block_dev(struct block_device *bdev)
+{
+	unsigned char *data = NULL;
+	struct page *data_page = NULL;
+
+	int dev_offset, pg_num, pg_off;
+	int uuid_pg_num, uuid_pg_off, i;
+	unsigned char *uuid_data = NULL;
+	struct page *uuid_data_page = NULL;
+
+	int last_pg_num = -1, last_uuid_pg_num = 0;
+	char buf[50];
+	struct fs_info *fs_info = NULL;
+
+	bdevname(bdev, buf);
+
+	PRINTK("uuid_from_block_dev looking for partition type of %s.\n", buf);
+
+	for (i = 0; uuid_list[i].name; i++) {
+		struct uuid_info *dat = &uuid_list[i];
+		dev_offset = (dat->bkoff << 10) + dat->sboff;
+		pg_num = dev_offset >> 12;
+		pg_off = dev_offset & 0xfff;
+		uuid_pg_num = dat->uuid_offset >> 12;
+		uuid_pg_off = dat->uuid_offset & 0xfff;
+
+		if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1)
+			continue;
+
+		/* Ignore partition types with no UUID offset */
+		if (!dat->uuid_offset)
+			continue;
+
+		if (pg_num != last_pg_num) {
+			if (data_page) {
+				kunmap(data_page);
+				__free_page(data_page);
+			}
+			data_page = read_bdev_page(bdev, pg_num);
+			if (!data_page)
+				continue;
+			data = kmap(data_page);
+		}
+
+		last_pg_num = pg_num;
+
+		if (strncmp(&data[pg_off], dat->magic, dat->sig_len))
+			continue;
+
+		PRINTK("This partition looks like %s.\n", dat->name);
+
+		fs_info = kzalloc(sizeof(struct fs_info), GFP_KERNEL);
+
+		if (!fs_info) {
+			PRINTK("Failed to allocate fs_info struct.");
+			fs_info = ERR_PTR(-ENOMEM);
+			break;
+		}
+
+		/* UUID can't be off the end of the disk */
+		if ((uuid_pg_num > bdev->bd_part->nr_sects >> 3) ||
+				!dat->uuid_offset)
+			goto no_uuid;
+
+		if (!uuid_data || uuid_pg_num != last_uuid_pg_num) {
+			/* No need to reread the page from above */
+			if (uuid_pg_num == pg_num && uuid_data)
+				memcpy(uuid_data, data, PAGE_SIZE);
+			else {
+				if (uuid_data_page) {
+					kunmap(uuid_data_page);
+					__free_page(uuid_data_page);
+				}
+				uuid_data_page = read_bdev_page(bdev, uuid_pg_num);
+				if (!uuid_data_page)
+					continue;
+				uuid_data = kmap(uuid_data_page);
+			}
+		}
+
+		last_uuid_pg_num = uuid_pg_num;
+		memcpy(&fs_info->uuid, &uuid_data[uuid_pg_off], 16);
+		fs_info->dev_t = bdev->bd_dev;
+
+no_uuid:
+		PRINT_HEX_DUMP(KERN_EMERG, "fs_info_from_block_dev "
+				"returning uuid ", DUMP_PREFIX_NONE, 16, 1,
+				fs_info->uuid, 16, 0);
+
+		if (dat->last_mount_size) {
+			int pg = dat->last_mount_offset >> 12, sz;
+			int off = dat->last_mount_offset & 0xfff;
+			struct page *last_mount = read_bdev_page(bdev, pg);
+			unsigned char *last_mount_data;
+			char *ptr;
+
+			if (!last_mount) {
+				fs_info = ERR_PTR(-ENOMEM);
+				break;
+			}
+			last_mount_data = kmap(last_mount);
+			sz = dat->last_mount_size;
+			ptr = kmalloc(sz, GFP_KERNEL);
+
+			if (!ptr) {
+				printk(KERN_EMERG "fs_info_from_block_dev "
+					"failed to get memory for last mount "
+					"timestamp.");
+				free_fs_info(fs_info);
+				fs_info = ERR_PTR(-ENOMEM);
+			} else {
+				fs_info->last_mount = ptr;
+				fs_info->last_mount_size = sz;
+				memcpy(ptr, &last_mount_data[off], sz);
+			}
+
+			kunmap(last_mount);
+			__free_page(last_mount);
+		}
+		break;
+	}
+
+	if (data_page) {
+		kunmap(data_page);
+		__free_page(data_page);
+	}
+
+	if (uuid_data_page) {
+		kunmap(uuid_data_page);
+		__free_page(uuid_data_page);
+	}
+
+	return fs_info;
+}
+
+static int __init uuid_debug_setup(char *str)
+{
+	int value;
+
+	if (sscanf(str, "=%d", &value))
+		debug_enabled = value;
+
+	return 1;
+}
+
+__setup("uuid_debug", uuid_debug_setup);
diff -Nur linux-4.2/drivers/accessibility/braille/braille_console.c linux-4.2-pck/drivers/accessibility/braille/braille_console.c
--- linux-4.2/drivers/accessibility/braille/braille_console.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/accessibility/braille/braille_console.c	2015-09-08 00:48:22.218364355 -0300
@@ -116,7 +116,7 @@
 	*c++ = csum;
 	*c++ = ETX;
 
-	braille_co->write(braille_co, data, c - data);
+	braille_co->write(braille_co, data, c - data, 0);
 }
 
 /* Follow the VC cursor*/
diff -Nur linux-4.2/drivers/acpi/blacklist.c linux-4.2-pck/drivers/acpi/blacklist.c
--- linux-4.2/drivers/acpi/blacklist.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/acpi/blacklist.c	2015-09-08 00:48:22.218364355 -0300
@@ -317,6 +317,37 @@
 	},
 
 	/*
+	 * The following Lenovo models have a broken workaround in the
+	 * acpi_video backlight implementation to meet the Windows 8
+	 * requirement of 101 backlight levels. Reverting to pre-Win8
+	 * behavoir fixes the problem.
+	 */
+	{
+	.callback = dmi_disable_osi_win8,
+	.ident = "Lenovo ThinkPad T430",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T430"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_win8,
+	.ident = "Lenovo ThinkPad T430s",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T430s"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_win8,
+	.ident = "Lenovo ThinkPad X230",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X230"),
+		},
+	},
+
+	/*
 	 * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
 	 * Linux ignores it, except for the machines enumerated below.
 	 */
diff -Nur linux-4.2/drivers/ata/acard-ahci.c linux-4.2-pck/drivers/ata/acard-ahci.c
--- linux-4.2/drivers/ata/acard-ahci.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/ata/acard-ahci.c	2015-09-08 00:48:22.218364355 -0300
@@ -478,6 +478,9 @@
 		ata_port_pbar_desc(ap, AHCI_PCI_BAR,
 				   0x100 + ap->port_no * 0x80, "port");
 
+		rc = ahci_setup_port_privdata(ap);
+		if (rc)
+			return rc;
 		/* set initial link pm policy */
 		/*
 		ap->pm_policy = NOT_AVAILABLE;
diff -Nur linux-4.2/drivers/ata/ahci.c linux-4.2-pck/drivers/ata/ahci.c
--- linux-4.2/drivers/ata/ahci.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/ata/ahci.c	2015-09-08 00:48:22.218364355 -0300
@@ -1597,6 +1597,9 @@
 		if (ap->flags & ATA_FLAG_EM)
 			ap->em_message_type = hpriv->em_msg_type;
 
+		rc = ahci_setup_port_privdata(ap);
+		if (rc)
+			return rc;
 
 		/* disabled/not-implemented port */
 		if (!(hpriv->port_map & (1 << i)))
diff -Nur linux-4.2/drivers/ata/ahci.h linux-4.2-pck/drivers/ata/ahci.h
--- linux-4.2/drivers/ata/ahci.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/ata/ahci.h	2015-09-08 00:48:22.218364355 -0300
@@ -315,6 +315,12 @@
 	/* enclosure management info per PM slot */
 	struct ahci_em_priv	em_priv[EM_MAX_SLOTS];
 	char			*irq_desc;	/* desc in /proc/interrupts */
+	bool			init_alpe;	/* alpe enabled by default */
+	bool			init_asp;	/* asp enabled by default */
+	bool			init_devslp;	/* devslp enabled by default */
+	u32			init_dito;	/* initial dito configuration */
+	u32			init_deto;	/* initial deto configuration */
+	u32			init_mdat;	/* initial mdat configuration */
 };
 
 struct ahci_host_priv {
@@ -374,6 +380,7 @@
 extern struct ata_port_operations ahci_pmp_retry_srst_ops;
 
 unsigned int ahci_dev_classify(struct ata_port *ap);
+int ahci_setup_port_privdata(struct ata_port *ap);
 void ahci_fill_cmd_slot(struct ahci_port_priv *pp, unsigned int tag,
 			u32 opts);
 void ahci_save_initial_config(struct device *dev,
diff -Nur linux-4.2/drivers/ata/libahci.c linux-4.2-pck/drivers/ata/libahci.c
--- linux-4.2/drivers/ata/libahci.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/ata/libahci.c	2015-09-08 00:48:22.218364355 -0300
@@ -684,6 +684,7 @@
 {
 	struct ata_port *ap = link->ap;
 	struct ahci_host_priv *hpriv = ap->host->private_data;
+	struct ahci_port_priv *ppriv = ap->private_data;
 	struct ahci_port_priv *pp = ap->private_data;
 	void __iomem *port_mmio = ahci_port_base(ap);
 
@@ -701,9 +702,9 @@
 
 	if (hpriv->cap & HOST_CAP_ALPM) {
 		u32 cmd = readl(port_mmio + PORT_CMD);
+		cmd &= ~(PORT_CMD_ASP | PORT_CMD_ALPE);
 
 		if (policy == ATA_LPM_MAX_POWER || !(hints & ATA_LPM_HIPM)) {
-			cmd &= ~(PORT_CMD_ASP | PORT_CMD_ALPE);
 			cmd |= PORT_CMD_ICC_ACTIVE;
 
 			writel(cmd, port_mmio + PORT_CMD);
@@ -711,6 +712,13 @@
 
 			/* wait 10ms to be sure we've come out of LPM state */
 			ata_msleep(ap, 10);
+		} else if (policy == ATA_LPM_FIRMWARE_DEFAULTS) {
+			if (ppriv->init_alpe)
+				cmd |= PORT_CMD_ALPE;
+			if (ppriv->init_asp)
+				cmd |= PORT_CMD_ASP;
+
+			writel(cmd, port_mmio + PORT_CMD);
 		} else {
 			cmd |= PORT_CMD_ALPE;
 			if (policy == ATA_LPM_MIN_POWER)
@@ -725,10 +733,17 @@
 	if ((hpriv->cap2 & HOST_CAP2_SDS) &&
 	    (hpriv->cap2 & HOST_CAP2_SADM) &&
 	    (link->device->flags & ATA_DFLAG_DEVSLP)) {
-		if (policy == ATA_LPM_MIN_POWER)
+		switch (policy) {
+		case ATA_LPM_MIN_POWER:
 			ahci_set_aggressive_devslp(ap, true);
-		else
+			break;
+		case ATA_LPM_FIRMWARE_DEFAULTS:
+			ahci_set_aggressive_devslp(ap, ppriv->init_devslp);
+			break;
+		default:
 			ahci_set_aggressive_devslp(ap, false);
+			break;
+		}
 	}
 
 	if (policy == ATA_LPM_MAX_POWER) {
@@ -2040,6 +2055,7 @@
 static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep)
 {
 	struct ahci_host_priv *hpriv = ap->host->private_data;
+	struct ahci_port_priv *ppriv = ap->private_data;
 	void __iomem *port_mmio = ahci_port_base(ap);
 	struct ata_device *dev = ap->link.device;
 	u32 devslp, dm, dito, mdat, deto;
@@ -2075,26 +2091,32 @@
 	if (rc)
 		return;
 
-	dm = (devslp & PORT_DEVSLP_DM_MASK) >> PORT_DEVSLP_DM_OFFSET;
-	dito = devslp_idle_timeout / (dm + 1);
-	if (dito > 0x3ff)
-		dito = 0x3ff;
-
-	/* Use the nominal value 10 ms if the read MDAT is zero,
-	 * the nominal value of DETO is 20 ms.
-	 */
-	if (dev->devslp_timing[ATA_LOG_DEVSLP_VALID] &
-	    ATA_LOG_DEVSLP_VALID_MASK) {
-		mdat = dev->devslp_timing[ATA_LOG_DEVSLP_MDAT] &
-		       ATA_LOG_DEVSLP_MDAT_MASK;
-		if (!mdat)
+	if (ppriv->init_devslp) {
+		dito = ppriv->init_dito;
+		deto = ppriv->init_deto;
+		mdat = ppriv->init_mdat;
+	} else {
+		dm = (devslp & PORT_DEVSLP_DM_MASK) >> PORT_DEVSLP_DM_OFFSET;
+		dito = devslp_idle_timeout / (dm + 1);
+		if (dito > 0x3ff)
+			dito = 0x3ff;
+
+		/* Use the nominal value 10 ms if the read MDAT is zero,
+		 * the nominal value of DETO is 20 ms.
+		 */
+		if (dev->devslp_timing[ATA_LOG_DEVSLP_VALID] &
+		    ATA_LOG_DEVSLP_VALID_MASK) {
+			mdat = dev->devslp_timing[ATA_LOG_DEVSLP_MDAT] &
+				ATA_LOG_DEVSLP_MDAT_MASK;
+			if (!mdat)
+				mdat = 10;
+			deto = dev->devslp_timing[ATA_LOG_DEVSLP_DETO];
+			if (!deto)
+				deto = 20;
+		} else {
 			mdat = 10;
-		deto = dev->devslp_timing[ATA_LOG_DEVSLP_DETO];
-		if (!deto)
 			deto = 20;
-	} else {
-		mdat = 10;
-		deto = 20;
+		}
 	}
 
 	devslp |= ((dito << PORT_DEVSLP_DITO_OFFSET) |
@@ -2257,19 +2279,53 @@
 }
 #endif
 
+/*
+ * Allocate port privdata and read back initial power management configuration
+ */
+int ahci_setup_port_privdata(struct ata_port *ap)
+{
+	struct ahci_port_priv *pp;
+	u32 cmd, devslp;
+	void __iomem *port_mmio = ahci_port_base(ap);
+
+	pp = kzalloc(sizeof(*pp), GFP_KERNEL);
+	if (!pp)
+		return -ENOMEM;
+
+	ap->private_data = pp;
+
+	cmd = readl(port_mmio + PORT_CMD);
+
+	if (cmd & PORT_CMD_ALPE)
+		pp->init_alpe = true;
+
+	if (cmd & PORT_CMD_ASP)
+		pp->init_asp = true;
+
+	devslp = readl(port_mmio + PORT_DEVSLP);
+
+	/* devslp unsupported or disabled */
+	if (!(devslp & PORT_DEVSLP_DSP) || !(devslp & PORT_DEVSLP_ADSE))
+		return 0;
+
+	pp->init_devslp = true;
+	pp->init_dito = (devslp >> PORT_DEVSLP_DITO_OFFSET) & 0x3ff;
+	pp->init_deto = (devslp >> PORT_DEVSLP_DETO_OFFSET) & 0xff;
+	pp->init_mdat = (devslp >> PORT_DEVSLP_MDAT_OFFSET) & 0x1f;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ahci_setup_port_privdata);
+
 static int ahci_port_start(struct ata_port *ap)
 {
 	struct ahci_host_priv *hpriv = ap->host->private_data;
+	struct ahci_port_priv *pp = ap->private_data;
 	struct device *dev = ap->host->dev;
-	struct ahci_port_priv *pp;
 	void *mem;
 	dma_addr_t mem_dma;
 	size_t dma_sz, rx_fis_sz;
 
-	pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
-	if (!pp)
-		return -ENOMEM;
-
 	if (ap->host->n_ports > 1) {
 		pp->irq_desc = devm_kzalloc(dev, 8, GFP_KERNEL);
 		if (!pp->irq_desc) {
@@ -2348,8 +2404,6 @@
 		ap->lock = &pp->lock;
 	}
 
-	ap->private_data = pp;
-
 	/* engage engines, captain */
 	return ahci_port_resume(ap);
 }
diff -Nur linux-4.2/drivers/ata/libahci_platform.c linux-4.2-pck/drivers/ata/libahci_platform.c
--- linux-4.2/drivers/ata/libahci_platform.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/ata/libahci_platform.c	2015-09-08 00:48:22.218364355 -0300
@@ -565,6 +565,10 @@
 		if (ap->flags & ATA_FLAG_EM)
 			ap->em_message_type = hpriv->em_msg_type;
 
+		rc = ahci_setup_port_privdata(ap);
+		if (rc)
+			return rc;
+
 		/* disabled/not-implemented port */
 		if (!(hpriv->port_map & (1 << i)))
 			ap->ops = &ata_dummy_port_ops;
diff -Nur linux-4.2/drivers/ata/libata-core.c linux-4.2-pck/drivers/ata/libata-core.c
--- linux-4.2/drivers/ata/libata-core.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/ata/libata-core.c	2015-09-08 00:48:22.221697530 -0300
@@ -2024,6 +2024,9 @@
 		}
 	}
 
+	if (id[79] & (1 << SATA_DIPM))
+		dev->init_dipm = true;
+
 	*p_class = class;
 
 	return 0;
@@ -3655,6 +3658,11 @@
 		return rc;
 
 	switch (policy) {
+	case ATA_LPM_FIRMWARE_DEFAULTS:
+		/* use the values we read at probe */
+		scontrol &= ~(0x7 << 8);
+		scontrol |= (link->init_lpm << 8);
+		break;
 	case ATA_LPM_MAX_POWER:
 		/* disable all LPM transitions */
 		scontrol |= (0x7 << 8);
@@ -5582,11 +5590,11 @@
 }
 
 /**
- *	sata_link_init_spd - Initialize link->sata_spd_limit
- *	@link: Link to configure sata_spd_limit for
+ *	sata_link_init_config - Initialize link->sata_spd_limit and init_lpm
+ *	@link: Link to configure sata_spd_limit and init_lpm for
  *
- *	Initialize @link->[hw_]sata_spd_limit to the currently
- *	configured value.
+ *	Initialize @link->[hw_]sata_spd_limit and @link->init_lpm to the
+ *	currently configured value.
  *
  *	LOCKING:
  *	Kernel thread context (may sleep).
@@ -5594,7 +5602,7 @@
  *	RETURNS:
  *	0 on success, -errno on failure.
  */
-int sata_link_init_spd(struct ata_link *link)
+int sata_link_init_config(struct ata_link *link)
 {
 	u8 spd;
 	int rc;
@@ -5611,6 +5619,8 @@
 
 	link->sata_spd_limit = link->hw_sata_spd_limit;
 
+	link->init_lpm = (link->saved_scontrol >> 8) & 0x7;
+
 	return 0;
 }
 
@@ -6160,9 +6170,9 @@
 			ap->cbl = ATA_CBL_SATA;
 
 		/* init sata_spd_limit to the current value */
-		sata_link_init_spd(&ap->link);
+		sata_link_init_config(&ap->link);
 		if (ap->slave_link)
-			sata_link_init_spd(ap->slave_link);
+			sata_link_init_config(ap->slave_link);
 
 		/* print per-port info to dmesg */
 		xfer_mask = ata_pack_xfermask(ap->pio_mask, ap->mwdma_mask,
diff -Nur linux-4.2/drivers/ata/libata-eh.c linux-4.2-pck/drivers/ata/libata-eh.c
--- linux-4.2/drivers/ata/libata-eh.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/ata/libata-eh.c	2015-09-08 00:48:22.221697530 -0300
@@ -3429,9 +3429,9 @@
 		return 0;
 
 	/*
-	 * DIPM is enabled only for MIN_POWER as some devices
-	 * misbehave when the host NACKs transition to SLUMBER.  Order
-	 * device and link configurations such that the host always
+	 * DIPM is enabled only for MIN_POWER and FIRMWARE_DEFAULT as some
+	 * devices misbehave when the host NACKs transition to SLUMBER.
+	 * Order device and link configurations such that the host always
 	 * allows DIPM requests.
 	 */
 	ata_for_each_dev(dev, link, ENABLED) {
@@ -3491,10 +3491,13 @@
 	if (ap && ap->slave_link)
 		ap->slave_link->lpm_policy = policy;
 
-	/* host config updated, enable DIPM if transitioning to MIN_POWER */
+	/* host config updated, enable DIPM if transitioning to MIN_POWER or
+	 * FIRMWARE_DEFAULT when enabled by firmware
+	 */
 	ata_for_each_dev(dev, link, ENABLED) {
-		if (policy == ATA_LPM_MIN_POWER && !no_dipm &&
-		    ata_id_has_dipm(dev->id)) {
+		if ((policy == ATA_LPM_MIN_POWER && !no_dipm &&
+		     ata_id_has_dipm(dev->id)) ||
+		    (policy == ATA_LPM_FIRMWARE_DEFAULTS && dev->init_dipm)) {
 			err_mask = ata_dev_set_feature(dev,
 					SETFEATURES_SATA_ENABLE, SATA_DIPM);
 			if (err_mask && err_mask != AC_ERR_DEV) {
diff -Nur linux-4.2/drivers/ata/libata-pmp.c linux-4.2-pck/drivers/ata/libata-pmp.c
--- linux-4.2/drivers/ata/libata-pmp.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/ata/libata-pmp.c	2015-09-08 00:48:22.221697530 -0300
@@ -538,7 +538,7 @@
 		ap->ops->pmp_attach(ap);
 
 	ata_for_each_link(tlink, ap, EDGE)
-		sata_link_init_spd(tlink);
+		sata_link_init_config(tlink);
 
 	return 0;
 
diff -Nur linux-4.2/drivers/ata/libata-scsi.c linux-4.2-pck/drivers/ata/libata-scsi.c
--- linux-4.2/drivers/ata/libata-scsi.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/ata/libata-scsi.c	2015-09-08 00:48:22.221697530 -0300
@@ -107,6 +107,7 @@
 static const char *ata_lpm_policy_names[] = {
 	[ATA_LPM_UNKNOWN]	= "max_performance",
 	[ATA_LPM_MAX_POWER]	= "max_performance",
+	[ATA_LPM_FIRMWARE_DEFAULTS] = "firmware_defaults",
 	[ATA_LPM_MED_POWER]	= "medium_power",
 	[ATA_LPM_MIN_POWER]	= "min_power",
 };
diff -Nur linux-4.2/drivers/ata/libata.h linux-4.2-pck/drivers/ata/libata.h
--- linux-4.2/drivers/ata/libata.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/ata/libata.h	2015-09-08 00:48:22.221697530 -0300
@@ -98,7 +98,7 @@
 extern bool ata_phys_link_offline(struct ata_link *link);
 extern void ata_dev_init(struct ata_device *dev);
 extern void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp);
-extern int sata_link_init_spd(struct ata_link *link);
+extern int sata_link_init_config(struct ata_link *link);
 extern int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern struct ata_port *ata_port_alloc(struct ata_host *host);
diff -Nur linux-4.2/drivers/ata/sata_highbank.c linux-4.2-pck/drivers/ata/sata_highbank.c
--- linux-4.2/drivers/ata/sata_highbank.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/ata/sata_highbank.c	2015-09-08 00:48:22.221697530 -0300
@@ -557,6 +557,10 @@
 		if (ap->flags & ATA_FLAG_EM)
 			ap->em_message_type = hpriv->em_msg_type;
 
+		rc = ahci_setup_port_privdata(ap);
+		if (rc)
+			goto err0;
+
 		/* disabled/not-implemented port */
 		if (!(hpriv->port_map & (1 << i)))
 			ap->ops = &ata_dummy_port_ops;
diff -Nur linux-4.2/drivers/cpufreq/cpufreq_ondemand.c linux-4.2-pck/drivers/cpufreq/cpufreq_ondemand.c
--- linux-4.2/drivers/cpufreq/cpufreq_ondemand.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/cpufreq/cpufreq_ondemand.c	2015-09-08 00:48:22.221697530 -0300
@@ -20,7 +20,11 @@
 
 /* On-demand governor macros */
 #define DEF_FREQUENCY_UP_THRESHOLD		(80)
+#ifdef CONFIG_PCK_INTERACTIVE
+#define DEF_SAMPLING_DOWN_FACTOR		(10)
+#else
 #define DEF_SAMPLING_DOWN_FACTOR		(1)
+#endif
 #define MAX_SAMPLING_DOWN_FACTOR		(100000)
 #define MICRO_FREQUENCY_UP_THRESHOLD		(95)
 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
diff -Nur linux-4.2/drivers/gpu/drm/drm_gem.c linux-4.2-pck/drivers/gpu/drm/drm_gem.c
--- linux-4.2/drivers/gpu/drm/drm_gem.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/gpu/drm/drm_gem.c	2015-09-08 11:20:32.040366141 -0300
@@ -137,7 +137,7 @@
 
 	drm_gem_private_object_init(dev, obj, size);
 
-	filp = shmem_file_setup("drm mm object", size, VM_NORESERVE);
+	filp = shmem_file_setup("drm mm object", size, VM_NORESERVE, 1);
 	if (IS_ERR(filp))
 		return PTR_ERR(filp);
 
diff -Nur linux-4.2/drivers/gpu/drm/radeon/r600.c linux-4.2-pck/drivers/gpu/drm/radeon/r600.c
--- linux-4.2/drivers/gpu/drm/radeon/r600.c	2015-08-30 16:21:13.000000000 -0300
+++ linux-4.2-pck/drivers/gpu/drm/radeon/r600.c	2015-09-08 00:49:30.085144720 -0300
@@ -2489,7 +2489,7 @@
 	}
 
 	DRM_INFO("Loading %s Microcode\n", chip_name);
-
+#if 0
 	snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
 	err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
 	if (err)
@@ -2541,7 +2541,7 @@
 			err = -EINVAL;
 		}
 	}
-
+#endif
 out:
 	if (err) {
 		if (err != -EINVAL)
@@ -3201,7 +3201,7 @@
 	r = radeon_bo_init(rdev);
 	if (r)
 		return r;
-
+#if 0
 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
 		r = r600_init_microcode(rdev);
 		if (r) {
@@ -3209,7 +3209,7 @@
 			return r;
 		}
 	}
-
+#endif
 	/* Initialize power management */
 	radeon_pm_init(rdev);
 
diff -Nur linux-4.2/drivers/gpu/drm/radeon/r600_cp.c linux-4.2-pck/drivers/gpu/drm/radeon/r600_cp.c
--- linux-4.2/drivers/gpu/drm/radeon/r600_cp.c	2015-08-30 16:21:17.000000000 -0300
+++ linux-4.2-pck/drivers/gpu/drm/radeon/r600_cp.c	2015-09-08 00:49:30.085144720 -0300
@@ -2241,7 +2241,7 @@
 		else
 			r600_vm_init(dev);
 	}
-
+#if 0
 	if (!dev_priv->me_fw || !dev_priv->pfp_fw) {
 		int err = r600_cp_init_microcode(dev_priv);
 		if (err) {
@@ -2250,6 +2250,9 @@
 			return err;
 		}
 	}
+#endif
+	printk("Skipping firmware loading\n");
+#endif
 	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
 		r700_cp_load_microcode(dev_priv);
 	else
diff -Nur linux-4.2/drivers/gpu/drm/ttm/ttm_tt.c linux-4.2-pck/drivers/gpu/drm/ttm/ttm_tt.c
--- linux-4.2/drivers/gpu/drm/ttm/ttm_tt.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/gpu/drm/ttm/ttm_tt.c	2015-09-08 11:20:32.080364185 -0300
@@ -339,7 +339,7 @@
 	if (!persistent_swap_storage) {
 		swap_storage = shmem_file_setup("ttm swap",
 						ttm->num_pages << PAGE_SHIFT,
-						0);
+						0, 0);
 		if (unlikely(IS_ERR(swap_storage))) {
 			pr_err("Failed allocating swap storage\n");
 			return PTR_ERR(swap_storage);
diff -Nur linux-4.2/drivers/input/joydev.c linux-4.2-pck/drivers/input/joydev.c
--- linux-4.2/drivers/input/joydev.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/input/joydev.c	2015-09-08 00:48:22.221697530 -0300
@@ -28,15 +28,21 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/cdev.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
 
 MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
 MODULE_DESCRIPTION("Joystick device interfaces");
 MODULE_SUPPORTED_DEVICE("input/js");
 MODULE_LICENSE("GPL");
-
 #define JOYDEV_MINOR_BASE	0
 #define JOYDEV_MINORS		16
 #define JOYDEV_BUFFER_SIZE	64
+#define MAX_REMAP_SIZE          10
+
+static int remap_array[MAX_REMAP_SIZE];
+static int remap_count = 0;
+static int free_buttons[MAX_REMAP_SIZE];
 
 struct joydev {
 	int open;
@@ -71,6 +77,9 @@
 	struct list_head node;
 };
 
+module_param_array(remap_array, int, &remap_count, 0 );
+MODULE_PARM_DESC( remap_array, "remap axis to buttons\n" );
+
 static int joydev_correct(int value, struct js_corr *corr)
 {
 	switch (corr->type) {
@@ -121,6 +130,17 @@
 	struct joydev *joydev = handle->private;
 	struct joydev_client *client;
 	struct js_event event;
+        int i;
+
+        if( remap_count > 0 && remap_count < MAX_REMAP_SIZE ){
+                for( i = 0; i < remap_count; i++ )
+                        if( code == remap_array[i] ){
+                                type = EV_KEY;
+                                code = free_buttons[i];
+                                if( value == 255 )
+                                        value = 1;
+                        }
+        }
 
 	switch (type) {
 
@@ -826,7 +846,7 @@
 			  const struct input_device_id *id)
 {
 	struct joydev *joydev;
-	int i, j, t, minor, dev_no;
+	int i, j = 0, t, minor, dev_no;
 	int error;
 
 	minor = input_get_new_minor(JOYDEV_MINOR_BASE, JOYDEV_MINORS, true);
@@ -871,15 +891,24 @@
 			joydev->keymap[i] = joydev->nkey;
 			joydev->keypam[joydev->nkey] = i + BTN_MISC;
 			joydev->nkey++;
-		}
+                       j = i;
+                }
+        if( remap_count > 0 && remap_count < MAX_REMAP_SIZE ){
+                printk( "[joydev] axis remapping enabled\n" );
+                for( i = 0; i < remap_count; i++ ){
+                        joydev->keymap[j + i + 1] = joydev->nkey;
+                        joydev->keypam[joydev->nkey] = i + j + 1 + BTN_MISC;
+                        free_buttons[i] = j + i + 1 + BTN_MISC;
+                        joydev->nkey++;
 
+		}
 	for (i = 0; i < BTN_JOYSTICK - BTN_MISC; i++)
 		if (test_bit(i + BTN_MISC, dev->keybit)) {
 			joydev->keymap[i] = joydev->nkey;
 			joydev->keypam[joydev->nkey] = i + BTN_MISC;
 			joydev->nkey++;
 		}
-
+	}
 	for (i = 0; i < joydev->nabs; i++) {
 		j = joydev->abspam[i];
 		if (input_abs_get_max(dev, j) == input_abs_get_min(dev, j)) {
diff -Nur linux-4.2/drivers/input/mouse/synaptics.c linux-4.2-pck/drivers/input/mouse/synaptics.c
--- linux-4.2/drivers/input/mouse/synaptics.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/input/mouse/synaptics.c	2015-09-08 00:48:22.221697530 -0300
@@ -1250,7 +1250,9 @@
 		/* Clickpads report only left button */
 		__clear_bit(BTN_RIGHT, dev->keybit);
 		__clear_bit(BTN_MIDDLE, dev->keybit);
-	}
+	} else if (SYN_CAP_CLICKPAD2BTN(priv->ext_cap_0c) ||
+		   SYN_CAP_CLICKPAD2BTN2(priv->ext_cap_0c))
+		__set_bit(INPUT_PROP_BUTTONPAD, dev->propbit);
 }
 
 static ssize_t synaptics_show_disable_gesture(struct psmouse *psmouse,
diff -Nur linux-4.2/drivers/input/mouse/synaptics.h linux-4.2-pck/drivers/input/mouse/synaptics.h
--- linux-4.2/drivers/input/mouse/synaptics.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/input/mouse/synaptics.h	2015-09-08 00:48:22.221697530 -0300
@@ -85,6 +85,7 @@
  */
 #define SYN_CAP_CLICKPAD(ex0c)		((ex0c) & 0x100000) /* 1-button ClickPad */
 #define SYN_CAP_CLICKPAD2BTN(ex0c)	((ex0c) & 0x000100) /* 2-button ClickPad */
+#define SYN_CAP_CLICKPAD2BTN2(ex0c)	((ex0c) & 0x200000) /* 2-button ClickPad */
 #define SYN_CAP_MAX_DIMENSIONS(ex0c)	((ex0c) & 0x020000)
 #define SYN_CAP_MIN_DIMENSIONS(ex0c)	((ex0c) & 0x002000)
 #define SYN_CAP_ADV_GESTURE(ex0c)	((ex0c) & 0x080000)
diff -Nur linux-4.2/drivers/macintosh/Kconfig linux-4.2-pck/drivers/macintosh/Kconfig
--- linux-4.2/drivers/macintosh/Kconfig	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/macintosh/Kconfig	2015-09-08 00:48:22.221697530 -0300
@@ -172,6 +172,13 @@
 
 	  If unsure, say Y.
 
+config ADB_TRACKPAD_ABSOLUTE
+	bool "Enable absolute mode for adb trackpads"
+	depends on INPUT_ADBHID
+	help
+	  Enable absolute mode in adb-base trackpads. This feature adds
+	  compatibility with synaptics Xorg / Xfree drivers.
+
 config MAC_EMUMOUSEBTN
 	tristate "Support for mouse button 2+3 emulation"
 	depends on SYSCTL && INPUT
diff -Nur linux-4.2/drivers/macintosh/adbhid.c linux-4.2-pck/drivers/macintosh/adbhid.c
--- linux-4.2/drivers/macintosh/adbhid.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/macintosh/adbhid.c	2015-09-08 00:48:22.221697530 -0300
@@ -261,6 +261,15 @@
 #define ADBMOUSE_MS_A3		8	/* Mouse systems A3 trackball (handler 3) */
 #define ADBMOUSE_MACALLY2	9	/* MacAlly 2-button mouse */
 
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+#define	ABS_XMIN	310
+#define	ABS_XMAX	1700
+#define	ABS_YMIN	200
+#define	ABS_YMAX	1000
+#define	ABS_ZMIN	0
+#define	ABS_ZMAX	55
+#endif
+
 static void
 adbhid_keyboard_input(unsigned char *data, int nb, int apoll)
 {
@@ -405,6 +414,9 @@
 adbhid_mouse_input(unsigned char *data, int nb, int autopoll)
 {
 	int id = (data[0] >> 4) & 0x0f;
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+	int btn = 0; int x_axis = 0; int y_axis = 0; int z_axis = 0;
+#endif
 
 	if (!adbhid[id]) {
 		printk(KERN_ERR "ADB HID on ID %d not yet registered\n", id);
@@ -436,6 +448,17 @@
 	      high bits of y-axis motion.  XY is additional
 	      high bits of x-axis motion.
 
+    For ADB Absolute motion protocol the data array will contain the
+    following values:
+
+		BITS    COMMENTS
+    data[0] = dddd 1100 ADB command: Talk, register 0, for device dddd.
+    data[1] = byyy yyyy Left button and y-axis motion.
+    data[2] = bxxx xxxx Second button and x-axis motion.
+    data[3] = 1yyy 1xxx Half bits of y-axis and x-axis motion.
+    data[4] = 1yyy 1xxx Higher bits of y-axis and x-axis motion.
+    data[5] = 1zzz 1zzz Higher and lower bits of z-pressure.
+
     MacAlly 2-button mouse protocol.
 
     For MacAlly 2-button mouse protocol the data array will contain the
@@ -458,8 +481,17 @@
 	switch (adbhid[id]->mouse_kind)
 	{
 	    case ADBMOUSE_TRACKPAD:
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+		x_axis = (data[2] & 0x7f) | ((data[3] & 0x07) << 7) |
+			((data[4] & 0x07) << 10);
+		y_axis = (data[1] & 0x7f) | ((data[3] & 0x70) << 3) |
+			((data[4] & 0x70) << 6);
+		z_axis = (data[5] & 0x07) | ((data[5] & 0x70) >> 1);
+		btn = (!(data[1] >> 7)) & 1;
+#else
 		data[1] = (data[1] & 0x7f) | ((data[1] & data[2]) & 0x80);
 		data[2] = data[2] | 0x80;
+#endif
 		break;
 	    case ADBMOUSE_MICROSPEED:
 		data[1] = (data[1] & 0x7f) | ((data[3] & 0x01) << 7);
@@ -485,17 +517,39 @@
                 break;
 	}
 
-	input_report_key(adbhid[id]->input, BTN_LEFT,   !((data[1] >> 7) & 1));
-	input_report_key(adbhid[id]->input, BTN_MIDDLE, !((data[2] >> 7) & 1));
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+	if ( adbhid[id]->mouse_kind == ADBMOUSE_TRACKPAD ) {
 
-	if (nb >= 4 && adbhid[id]->mouse_kind != ADBMOUSE_TRACKPAD)
-		input_report_key(adbhid[id]->input, BTN_RIGHT,  !((data[3] >> 7) & 1));
+		if(z_axis > 30) input_report_key(adbhid[id]->input, BTN_TOUCH, 1);
+		if(z_axis < 25) input_report_key(adbhid[id]->input, BTN_TOUCH, 0);
 
-	input_report_rel(adbhid[id]->input, REL_X,
-			 ((data[2]&0x7f) < 64 ? (data[2]&0x7f) : (data[2]&0x7f)-128 ));
-	input_report_rel(adbhid[id]->input, REL_Y,
-			 ((data[1]&0x7f) < 64 ? (data[1]&0x7f) : (data[1]&0x7f)-128 ));
+		if(z_axis > 0){
+			input_report_abs(adbhid[id]->input, ABS_X, x_axis);
+			input_report_abs(adbhid[id]->input, ABS_Y, y_axis);
+			input_report_key(adbhid[id]->input, BTN_TOOL_FINGER, 1);
+			input_report_key(adbhid[id]->input, ABS_TOOL_WIDTH, 5);
+		} else {
+			input_report_key(adbhid[id]->input, BTN_TOOL_FINGER, 0);
+			input_report_key(adbhid[id]->input, ABS_TOOL_WIDTH, 0);
+		}
+
+		input_report_abs(adbhid[id]->input, ABS_PRESSURE, z_axis);
+		input_report_key(adbhid[id]->input, BTN_LEFT, btn);
+	} else {
+#endif
+		input_report_key(adbhid[id]->input, BTN_LEFT,   !((data[1] >> 7) & 1));
+		input_report_key(adbhid[id]->input, BTN_MIDDLE, !((data[2] >> 7) & 1));
+
+		if (nb >= 4 && adbhid[id]->mouse_kind != ADBMOUSE_TRACKPAD)
+			input_report_key(adbhid[id]->input, BTN_RIGHT,  !((data[3] >> 7) & 1));
 
+		input_report_rel(adbhid[id]->input, REL_X,
+				((data[2]&0x7f) < 64 ? (data[2]&0x7f) : (data[2]&0x7f)-128 ));
+		input_report_rel(adbhid[id]->input, REL_Y,
+				((data[1]&0x7f) < 64 ? (data[1]&0x7f) : (data[1]&0x7f)-128 ));
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+	}
+#endif
 	input_sync(adbhid[id]->input);
 }
 
@@ -849,6 +903,15 @@
 		input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
 			BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 		input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+                set_bit(EV_ABS, input_dev->evbit);
+		input_set_abs_params(input_dev, ABS_X, ABS_XMIN, ABS_XMAX, 0, 0);
+		input_set_abs_params(input_dev, ABS_Y, ABS_YMIN, ABS_YMAX, 0, 0);
+		input_set_abs_params(input_dev, ABS_PRESSURE, ABS_ZMIN, ABS_ZMAX, 0, 0);
+		set_bit(BTN_TOUCH, input_dev->keybit);
+		set_bit(BTN_TOOL_FINGER, input_dev->keybit);
+		set_bit(ABS_TOOL_WIDTH, input_dev->absbit);
+#endif
 		break;
 
 	case ADB_MISC:
@@ -1132,7 +1195,11 @@
 	            r1_buffer[3],
 	            r1_buffer[4],
 	            r1_buffer[5],
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+		    0x00, /* Enable absolute mode */
+#else
 	            0x03, /*r1_buffer[6],*/
+#endif
 	            r1_buffer[7]);
 
 	    /* Without this flush, the trackpad may be locked up */
diff -Nur linux-4.2/drivers/net/ethernet/intel/e1000e/netdev.c linux-4.2-pck/drivers/net/ethernet/intel/e1000e/netdev.c
--- linux-4.2/drivers/net/ethernet/intel/e1000e/netdev.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/net/ethernet/intel/e1000e/netdev.c	2015-09-08 00:48:22.225030705 -0300
@@ -4280,18 +4280,29 @@
 	struct e1000_adapter *adapter = container_of(cc, struct e1000_adapter,
 						     cc);
 	struct e1000_hw *hw = &adapter->hw;
+	u32 systimel_1, systimel_2, systimeh;
 	cycle_t systim, systim_next;
-	/* SYSTIMH latching upon SYSTIML read does not work well. To fix that
-	 * we don't want to allow overflow of SYSTIML and a change to SYSTIMH
-	 * to occur between reads, so if we read a vale close to overflow, we
-	 * wait for overflow to occur and read both registers when its safe.
+	/* SYSTIMH latching upon SYSTIML read does not work well.
+	 * This means that if SYSTIML overflows after we read it but before
+	 * we read SYSTIMH, the value of SYSTIMH has been incremented and we
+	 * will experience a huge non linear increment in the systime value
+	 * to fix that we test for overflow and if true, we re-read systime.
 	 */
-	u32 systim_overflow_latch_fix = 0x3FFFFFFF;
-
-	do {
-		systim = (cycle_t)er32(SYSTIML);
-	} while (systim > systim_overflow_latch_fix);
-	systim |= (cycle_t)er32(SYSTIMH) << 32;
+	systimel_1 = er32(SYSTIML);
+	systimeh = er32(SYSTIMH);
+	systimel_2 = er32(SYSTIML);
+	/* Check for overflow. If there was no overflow, use the values */
+	if (systimel_1 < systimel_2) {
+		systim = (cycle_t)systimel_1;
+		systim |= (cycle_t)systimeh << 32;
+	} else {
+		/* There was an overflow, read again SYSTIMH, and use
+		 * systimel_2
+		 */
+		systimeh = er32(SYSTIMH);
+		systim = (cycle_t)systimel_2;
+		systim |= (cycle_t)systimeh << 32;
+	}
 
 	if ((hw->mac.type == e1000_82574) || (hw->mac.type == e1000_82583)) {
 		u64 incvalue, time_delta, rem, temp;
diff -Nur linux-4.2/drivers/net/netconsole.c linux-4.2-pck/drivers/net/netconsole.c
--- linux-4.2/drivers/net/netconsole.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/net/netconsole.c	2015-09-08 00:48:22.225030705 -0300
@@ -836,7 +836,7 @@
 }
 
 static void write_ext_msg(struct console *con, const char *msg,
-			  unsigned int len)
+			  unsigned int len, unsigned int loglevel)
 {
 	struct netconsole_target *nt;
 	unsigned long flags;
@@ -851,7 +851,8 @@
 	spin_unlock_irqrestore(&target_list_lock, flags);
 }
 
-static void write_msg(struct console *con, const char *msg, unsigned int len)
+static void write_msg(struct console *con, const char *msg, unsigned int len,
+                      unsigned int loglevel)
 {
 	int frag, left;
 	unsigned long flags;
diff -Nur linux-4.2/drivers/platform/x86/Kconfig linux-4.2-pck/drivers/platform/x86/Kconfig
--- linux-4.2/drivers/platform/x86/Kconfig	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/platform/x86/Kconfig	2015-09-08 00:48:22.225030705 -0300
@@ -489,9 +489,30 @@
 	  If you are not sure, say Y here.  The driver enables polling only if
 	  it is strictly necessary to do so.
 
+config THINKPAD_EC
+	tristate
+	depends on X86
+	---help---
+	  This is a low-level driver for accessing the ThinkPad H8S embedded
+	  controller over the LPC bus (not to be confused with the ACPI Embedded
+	  Controller interface).
+
+config TP_SMAPI
+	tristate "ThinkPad SMAPI Support"
+	depends on X86
+	select THINKPAD_EC
+	default n
+	help
+	  This adds SMAPI support on Lenovo/IBM ThinkPads, for features such
+	  as battery charging control. For more information about this driver
+	  see <http://www.thinkwiki.org/wiki/tp_smapi>.
+
+	  If you have a Lenovo/IBM ThinkPad laptop, say Y or M here.
+
 config SENSORS_HDAPS
 	tristate "Thinkpad Hard Drive Active Protection System (hdaps)"
 	depends on INPUT && X86
+	select THINKPAD_EC
 	select INPUT_POLLDEV
 	default n
 	help
diff -Nur linux-4.2/drivers/platform/x86/Makefile linux-4.2-pck/drivers/platform/x86/Makefile
--- linux-4.2/drivers/platform/x86/Makefile	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/platform/x86/Makefile	2015-09-08 00:48:22.225030705 -0300
@@ -25,6 +25,8 @@
 obj-$(CONFIG_SONY_LAPTOP)	+= sony-laptop.o
 obj-$(CONFIG_IDEAPAD_LAPTOP)	+= ideapad-laptop.o
 obj-$(CONFIG_THINKPAD_ACPI)	+= thinkpad_acpi.o
+obj-$(CONFIG_THINKPAD_EC)	+= thinkpad_ec.o
+obj-$(CONFIG_TP_SMAPI)		+= tp_smapi.o
 obj-$(CONFIG_SENSORS_HDAPS)	+= hdaps.o
 obj-$(CONFIG_FUJITSU_LAPTOP)	+= fujitsu-laptop.o
 obj-$(CONFIG_FUJITSU_TABLET)	+= fujitsu-tablet.o
diff -Nur linux-4.2/drivers/platform/x86/hdaps.c linux-4.2-pck/drivers/platform/x86/hdaps.c
--- linux-4.2/drivers/platform/x86/hdaps.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/platform/x86/hdaps.c	2015-09-08 00:48:22.225030705 -0300
@@ -30,266 +30,384 @@
 
 #include <linux/delay.h>
 #include <linux/platform_device.h>
-#include <linux/input-polldev.h>
+#include <linux/input.h>
 #include <linux/kernel.h>
-#include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/timer.h>
 #include <linux/dmi.h>
 #include <linux/jiffies.h>
-#include <linux/io.h>
-
-#define HDAPS_LOW_PORT		0x1600	/* first port used by hdaps */
-#define HDAPS_NR_PORTS		0x30	/* number of ports: 0x1600 - 0x162f */
-
-#define HDAPS_PORT_STATE	0x1611	/* device state */
-#define HDAPS_PORT_YPOS		0x1612	/* y-axis position */
-#define	HDAPS_PORT_XPOS		0x1614	/* x-axis position */
-#define HDAPS_PORT_TEMP1	0x1616	/* device temperature, in Celsius */
-#define HDAPS_PORT_YVAR		0x1617	/* y-axis variance (what is this?) */
-#define HDAPS_PORT_XVAR		0x1619	/* x-axis variance (what is this?) */
-#define HDAPS_PORT_TEMP2	0x161b	/* device temperature (again?) */
-#define HDAPS_PORT_UNKNOWN	0x161c	/* what is this? */
-#define HDAPS_PORT_KMACT	0x161d	/* keyboard or mouse activity */
-
-#define STATE_FRESH		0x50	/* accelerometer data is fresh */
+#include <linux/thinkpad_ec.h>
+#include <linux/pci_ids.h>
+#include <linux/version.h>
+
+/* Embedded controller accelerometer read command and its result: */
+static const struct thinkpad_ec_row ec_accel_args =
+	{ .mask = 0x0001, .val = {0x11} };
+#define EC_ACCEL_IDX_READOUTS	0x1	/* readouts included in this read */
+					/* First readout, if READOUTS>=1: */
+#define EC_ACCEL_IDX_YPOS1	0x2	/*   y-axis position word */
+#define EC_ACCEL_IDX_XPOS1	0x4	/*   x-axis position word */
+#define EC_ACCEL_IDX_TEMP1	0x6	/*   device temperature in Celsius */
+					/* Second readout, if READOUTS>=2: */
+#define EC_ACCEL_IDX_XPOS2	0x7	/*   y-axis position word */
+#define EC_ACCEL_IDX_YPOS2	0x9	/*   x-axis position word */
+#define EC_ACCEL_IDX_TEMP2	0xb	/*   device temperature in Celsius */
+#define EC_ACCEL_IDX_QUEUED	0xc	/* Number of queued readouts left */
+#define EC_ACCEL_IDX_KMACT	0xd	/* keyboard or mouse activity */
+#define EC_ACCEL_IDX_RETVAL	0xf	/* command return value, good=0x00 */
 
 #define KEYBD_MASK		0x20	/* set if keyboard activity */
 #define MOUSE_MASK		0x40	/* set if mouse activity */
-#define KEYBD_ISSET(n)		(!! (n & KEYBD_MASK))	/* keyboard used? */
-#define MOUSE_ISSET(n)		(!! (n & MOUSE_MASK))	/* mouse used? */
 
-#define INIT_TIMEOUT_MSECS	4000	/* wait up to 4s for device init ... */
-#define INIT_WAIT_MSECS		200	/* ... in 200ms increments */
+#define READ_TIMEOUT_MSECS	100	/* wait this long for device read */
+#define RETRY_MSECS		3	/* retry delay */
 
-#define HDAPS_POLL_INTERVAL	50	/* poll for input every 1/20s (50 ms)*/
 #define HDAPS_INPUT_FUZZ	4	/* input event threshold */
 #define HDAPS_INPUT_FLAT	4
+#define KMACT_REMEMBER_PERIOD   (HZ/10) /* keyboard/mouse persistance */
 
-#define HDAPS_X_AXIS		(1 << 0)
-#define HDAPS_Y_AXIS		(1 << 1)
-#define HDAPS_BOTH_AXES		(HDAPS_X_AXIS | HDAPS_Y_AXIS)
+/* Input IDs */
+#define HDAPS_INPUT_VENDOR	PCI_VENDOR_ID_IBM
+#define HDAPS_INPUT_PRODUCT	0x5054 /* "TP", shared with thinkpad_acpi */
+#define HDAPS_INPUT_JS_VERSION	0x6801 /* Joystick emulation input device */
+#define HDAPS_INPUT_RAW_VERSION	0x4801 /* Raw accelerometer input device */
+
+/* Axis orientation. */
+/* The unnatural bit-representation of inversions is for backward
+ * compatibility with the"invert=1" module parameter.             */
+#define HDAPS_ORIENT_INVERT_XY  0x01   /* Invert both X and Y axes.       */
+#define HDAPS_ORIENT_INVERT_X   0x02   /* Invert the X axis (uninvert if
+					* already inverted by INVERT_XY). */
+#define HDAPS_ORIENT_SWAP       0x04   /* Swap the axes. The swap occurs
+					* before inverting X or Y.        */
+#define HDAPS_ORIENT_MAX        0x07
+#define HDAPS_ORIENT_UNDEFINED  0xFF   /* Placeholder during initialization */
+#define HDAPS_ORIENT_INVERT_Y   (HDAPS_ORIENT_INVERT_XY | HDAPS_ORIENT_INVERT_X)
 
+static struct timer_list hdaps_timer;
 static struct platform_device *pdev;
-static struct input_polled_dev *hdaps_idev;
-static unsigned int hdaps_invert;
-static u8 km_activity;
-static int rest_x;
-static int rest_y;
-
-static DEFINE_MUTEX(hdaps_mtx);
-
-/*
- * __get_latch - Get the value from a given port.  Callers must hold hdaps_mtx.
- */
-static inline u8 __get_latch(u16 port)
-{
-	return inb(port) & 0xff;
+static struct input_dev *hdaps_idev;     /* joystick-like device with fuzz */
+static struct input_dev *hdaps_idev_raw; /* raw hdaps sensor readouts */
+static unsigned int hdaps_invert = HDAPS_ORIENT_UNDEFINED;
+static int needs_calibration;
+
+/* Configuration: */
+static int sampling_rate = 50;       /* Sampling rate  */
+static int oversampling_ratio = 5;   /* Ratio between our sampling rate and
+				      * EC accelerometer sampling rate      */
+static int running_avg_filter_order = 2; /* EC running average filter order */
+
+/* Latest state readout: */
+static int pos_x, pos_y;      /* position */
+static int temperature;       /* temperature */
+static int stale_readout = 1; /* last read invalid */
+static int rest_x, rest_y;    /* calibrated rest position */
+
+/* Last time we saw keyboard and mouse activity: */
+static u64 last_keyboard_jiffies = INITIAL_JIFFIES;
+static u64 last_mouse_jiffies = INITIAL_JIFFIES;
+static u64 last_update_jiffies = INITIAL_JIFFIES;
+
+/* input device use count */
+static int hdaps_users;
+static DEFINE_MUTEX(hdaps_users_mtx);
+
+/* Some models require an axis transformation to the standard representation */
+static void transform_axes(int *x, int *y)
+{
+	if (hdaps_invert & HDAPS_ORIENT_SWAP) {
+		int z;
+		z = *x;
+		*x = *y;
+		*y = z;
+	}
+	if (hdaps_invert & HDAPS_ORIENT_INVERT_XY) {
+		*x = -*x;
+		*y = -*y;
+	}
+	if (hdaps_invert & HDAPS_ORIENT_INVERT_X)
+		*x = -*x;
 }
 
-/*
- * __check_latch - Check a port latch for a given value.  Returns zero if the
- * port contains the given value.  Callers must hold hdaps_mtx.
+/**
+ * __hdaps_update - query current state, with locks already acquired
+ * @fast: if nonzero, do one quick attempt without retries.
+ *
+ * Query current accelerometer state and update global state variables.
+ * Also prefetches the next query. Caller must hold controller lock.
  */
-static inline int __check_latch(u16 port, u8 val)
+static int __hdaps_update(int fast)
 {
-	if (__get_latch(port) == val)
-		return 0;
-	return -EINVAL;
-}
+	/* Read data: */
+	struct thinkpad_ec_row data;
+	int ret;
 
-/*
- * __wait_latch - Wait up to 100us for a port latch to get a certain value,
- * returning zero if the value is obtained.  Callers must hold hdaps_mtx.
- */
-static int __wait_latch(u16 port, u8 val)
-{
-	unsigned int i;
+	data.mask = (1 << EC_ACCEL_IDX_READOUTS) | (1 << EC_ACCEL_IDX_KMACT) |
+		    (3 << EC_ACCEL_IDX_YPOS1)    | (3 << EC_ACCEL_IDX_XPOS1) |
+		    (1 << EC_ACCEL_IDX_TEMP1)    | (1 << EC_ACCEL_IDX_RETVAL);
+	if (fast)
+		ret = thinkpad_ec_try_read_row(&ec_accel_args, &data);
+	else
+		ret = thinkpad_ec_read_row(&ec_accel_args, &data);
+	thinkpad_ec_prefetch_row(&ec_accel_args); /* Prefetch even if error */
+	if (ret)
+		return ret;
 
-	for (i = 0; i < 20; i++) {
-		if (!__check_latch(port, val))
-			return 0;
-		udelay(5);
+	/* Check status: */
+	if (data.val[EC_ACCEL_IDX_RETVAL] != 0x00) {
+		pr_warn("read RETVAL=0x%02x\n",
+		       data.val[EC_ACCEL_IDX_RETVAL]);
+		return -EIO;
 	}
 
-	return -EIO;
+	if (data.val[EC_ACCEL_IDX_READOUTS] < 1)
+		return -EBUSY; /* no pending readout, try again later */
+
+	/* Parse position data: */
+	pos_x = *(s16 *)(data.val+EC_ACCEL_IDX_XPOS1);
+	pos_y = *(s16 *)(data.val+EC_ACCEL_IDX_YPOS1);
+	transform_axes(&pos_x, &pos_y);
+
+	/* Keyboard and mouse activity status is cleared as soon as it's read,
+	 * so applications will eat each other's events. Thus we remember any
+	 * event for KMACT_REMEMBER_PERIOD jiffies.
+	 */
+	if (data.val[EC_ACCEL_IDX_KMACT] & KEYBD_MASK)
+		last_keyboard_jiffies = get_jiffies_64();
+	if (data.val[EC_ACCEL_IDX_KMACT] & MOUSE_MASK)
+		last_mouse_jiffies = get_jiffies_64();
+
+	temperature = data.val[EC_ACCEL_IDX_TEMP1];
+
+	last_update_jiffies = get_jiffies_64();
+	stale_readout = 0;
+	if (needs_calibration) {
+		rest_x = pos_x;
+		rest_y = pos_y;
+		needs_calibration = 0;
+	}
+
+	return 0;
 }
 
-/*
- * __device_refresh - request a refresh from the accelerometer.  Does not wait
- * for refresh to complete.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_update - acquire locks and query current state
+ *
+ * Query current accelerometer state and update global state variables.
+ * Also prefetches the next query.
+ * Retries until timeout if the accelerometer is not in ready status (common).
+ * Does its own locking.
  */
-static void __device_refresh(void)
+static int hdaps_update(void)
 {
-	udelay(200);
-	if (inb(0x1604) != STATE_FRESH) {
-		outb(0x11, 0x1610);
-		outb(0x01, 0x161f);
+	u64 age = get_jiffies_64() - last_update_jiffies;
+	int total, ret;
+
+	if (!stale_readout && age < (9*HZ)/(10*sampling_rate))
+		return 0; /* already updated recently */
+	for (total = 0; total < READ_TIMEOUT_MSECS; total += RETRY_MSECS) {
+		ret = thinkpad_ec_lock();
+		if (ret)
+			return ret;
+		ret = __hdaps_update(0);
+		thinkpad_ec_unlock();
+
+		if (!ret)
+			return 0;
+		if (ret != -EBUSY)
+			break;
+		msleep(RETRY_MSECS);
 	}
+	return ret;
 }
 
-/*
- * __device_refresh_sync - request a synchronous refresh from the
- * accelerometer.  We wait for the refresh to complete.  Returns zero if
- * successful and nonzero on error.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_set_power - enable or disable power to the accelerometer.
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static int __device_refresh_sync(void)
+static int hdaps_set_power(int on)
 {
-	__device_refresh();
-	return __wait_latch(0x1604, STATE_FRESH);
+	struct thinkpad_ec_row args =
+		{ .mask = 0x0003, .val = {0x14, on?0x01:0x00} };
+	struct thinkpad_ec_row data = { .mask = 0x8000 };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	if (ret)
+		return ret;
+	if (data.val[0xF] != 0x00)
+		return -EIO;
+	return 0;
 }
 
-/*
- * __device_complete - indicate to the accelerometer that we are done reading
- * data, and then initiate an async refresh.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_set_ec_config - set accelerometer parameters.
+ * @ec_rate: embedded controller sampling rate
+ * @order: embedded controller running average filter order
+ * (Normally we have @ec_rate = sampling_rate * oversampling_ratio.)
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static inline void __device_complete(void)
+static int hdaps_set_ec_config(int ec_rate, int order)
 {
-	inb(0x161f);
-	inb(0x1604);
-	__device_refresh();
+	struct thinkpad_ec_row args = { .mask = 0x000F,
+		.val = {0x10, (u8)ec_rate, (u8)(ec_rate>>8), order} };
+	struct thinkpad_ec_row data = { .mask = 0x8000 };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	pr_debug("setting ec_rate=%d, filter_order=%d\n", ec_rate, order);
+	if (ret)
+		return ret;
+	if (data.val[0xF] == 0x03) {
+		pr_warn("config param out of range\n");
+		return -EINVAL;
+	}
+	if (data.val[0xF] == 0x06) {
+		pr_warn("config change already pending\n");
+		return -EBUSY;
+	}
+	if (data.val[0xF] != 0x00) {
+		pr_warn("config change error, ret=%d\n",
+		      data.val[0xF]);
+		return -EIO;
+	}
+	return 0;
 }
 
-/*
- * hdaps_readb_one - reads a byte from a single I/O port, placing the value in
- * the given pointer.  Returns zero on success or a negative error on failure.
- * Can sleep.
+/**
+ * hdaps_get_ec_config - get accelerometer parameters.
+ * @ec_rate: embedded controller sampling rate
+ * @order: embedded controller running average filter order
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static int hdaps_readb_one(unsigned int port, u8 *val)
+static int hdaps_get_ec_config(int *ec_rate, int *order)
 {
-	int ret;
-
-	mutex_lock(&hdaps_mtx);
-
-	/* do a sync refresh -- we need to be sure that we read fresh data */
-	ret = __device_refresh_sync();
+	const struct thinkpad_ec_row args =
+		{ .mask = 0x0003, .val = {0x17, 0x82} };
+	struct thinkpad_ec_row data = { .mask = 0x801F };
+	int ret = thinkpad_ec_read_row(&args, &data);
 	if (ret)
-		goto out;
-
-	*val = inb(port);
-	__device_complete();
-
-out:
-	mutex_unlock(&hdaps_mtx);
-	return ret;
+		return ret;
+	if (data.val[0xF] != 0x00)
+		return -EIO;
+	if (!(data.val[0x1] & 0x01))
+		return -ENXIO; /* accelerometer polling not enabled */
+	if (data.val[0x1] & 0x02)
+		return -EBUSY; /* config change in progress, retry later */
+	*ec_rate = data.val[0x2] | ((int)(data.val[0x3]) << 8);
+	*order = data.val[0x4];
+	return 0;
 }
 
-/* __hdaps_read_pair - internal lockless helper for hdaps_read_pair(). */
-static int __hdaps_read_pair(unsigned int port1, unsigned int port2,
-			     int *x, int *y)
+/**
+ * hdaps_get_ec_mode - get EC accelerometer mode
+ * Returns zero on success and negative error code on failure.  Can sleep.
+ */
+static int hdaps_get_ec_mode(u8 *mode)
 {
-	/* do a sync refresh -- we need to be sure that we read fresh data */
-	if (__device_refresh_sync())
+	const struct thinkpad_ec_row args =
+		{ .mask = 0x0001, .val = {0x13} };
+	struct thinkpad_ec_row data = { .mask = 0x8002 };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	if (ret)
+		return ret;
+	if (data.val[0xF] != 0x00) {
+		pr_warn("accelerometer not implemented (0x%02x)\n",
+		       data.val[0xF]);
 		return -EIO;
-
-	*y = inw(port2);
-	*x = inw(port1);
-	km_activity = inb(HDAPS_PORT_KMACT);
-	__device_complete();
-
-	/* hdaps_invert is a bitvector to negate the axes */
-	if (hdaps_invert & HDAPS_X_AXIS)
-		*x = -*x;
-	if (hdaps_invert & HDAPS_Y_AXIS)
-		*y = -*y;
-
+	}
+	*mode = data.val[0x1];
 	return 0;
 }
 
-/*
- * hdaps_read_pair - reads the values from a pair of ports, placing the values
- * in the given pointers.  Returns zero on success.  Can sleep.
+/**
+ * hdaps_check_ec - checks something about the EC.
+ * Follows the clean-room spec for HDAPS; we don't know what it means.
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static int hdaps_read_pair(unsigned int port1, unsigned int port2,
-			   int *val1, int *val2)
+static int hdaps_check_ec(void)
 {
-	int ret;
-
-	mutex_lock(&hdaps_mtx);
-	ret = __hdaps_read_pair(port1, port2, val1, val2);
-	mutex_unlock(&hdaps_mtx);
-
-	return ret;
+	const struct thinkpad_ec_row args =
+		{ .mask = 0x0003, .val = {0x17, 0x81} };
+	struct thinkpad_ec_row data = { .mask = 0x800E };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	if (ret)
+		return  ret;
+	if (!((data.val[0x1] == 0x00 && data.val[0x2] == 0x60) || /* cleanroom spec */
+	      (data.val[0x1] == 0x01 && data.val[0x2] == 0x00)) || /* seen on T61 */
+	    data.val[0x3] != 0x00 || data.val[0xF] != 0x00) {
+		pr_warn("hdaps_check_ec: bad response (0x%x,0x%x,0x%x,0x%x)\n",
+		       data.val[0x1], data.val[0x2],
+		       data.val[0x3], data.val[0xF]);
+		return -EIO;
+	}
+	return 0;
 }
 
-/*
- * hdaps_device_init - initialize the accelerometer.  Returns zero on success
- * and negative error code on failure.  Can sleep.
+/**
+ * hdaps_device_init - initialize the accelerometer.
+ *
+ * Call several embedded controller functions to test and initialize the
+ * accelerometer.
+ * Returns zero on success and negative error code on failure. Can sleep.
  */
+#define FAILED_INIT(msg) pr_err("init failed at: %s\n", msg)
 static int hdaps_device_init(void)
 {
-	int total, ret = -ENXIO;
+	int ret;
+	u8 mode;
 
-	mutex_lock(&hdaps_mtx);
+	ret = thinkpad_ec_lock();
+	if (ret)
+		return ret;
 
-	outb(0x13, 0x1610);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
+	if (hdaps_get_ec_mode(&mode))
+		{ FAILED_INIT("hdaps_get_ec_mode failed"); goto bad; }
 
-	/*
-	 * Most ThinkPads return 0x01.
-	 *
-	 * Others--namely the R50p, T41p, and T42p--return 0x03.  These laptops
-	 * have "inverted" axises.
-	 *
-	 * The 0x02 value occurs when the chip has been previously initialized.
-	 */
-	if (__check_latch(0x1611, 0x03) &&
-		     __check_latch(0x1611, 0x02) &&
-		     __check_latch(0x1611, 0x01))
-		goto out;
+	pr_debug("initial mode latch is 0x%02x\n", mode);
+	if (mode == 0x00)
+		{ FAILED_INIT("accelerometer not available"); goto bad; }
 
-	printk(KERN_DEBUG "hdaps: initial latch check good (0x%02x)\n",
-	       __get_latch(0x1611));
+	if (hdaps_check_ec())
+		{ FAILED_INIT("hdaps_check_ec failed"); goto bad; }
 
-	outb(0x17, 0x1610);
-	outb(0x81, 0x1611);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
-	if (__wait_latch(0x1611, 0x00))
-		goto out;
-	if (__wait_latch(0x1612, 0x60))
-		goto out;
-	if (__wait_latch(0x1613, 0x00))
-		goto out;
-	outb(0x14, 0x1610);
-	outb(0x01, 0x1611);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
-	outb(0x10, 0x1610);
-	outb(0xc8, 0x1611);
-	outb(0x00, 0x1612);
-	outb(0x02, 0x1613);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
-	if (__device_refresh_sync())
-		goto out;
-	if (__wait_latch(0x1611, 0x00))
-		goto out;
+	if (hdaps_set_power(1))
+		{ FAILED_INIT("hdaps_set_power failed"); goto bad; }
 
-	/* we have done our dance, now let's wait for the applause */
-	for (total = INIT_TIMEOUT_MSECS; total > 0; total -= INIT_WAIT_MSECS) {
-		int x, y;
-
-		/* a read of the device helps push it into action */
-		__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y);
-		if (!__wait_latch(0x1611, 0x02)) {
-			ret = 0;
-			break;
-		}
+	if (hdaps_set_ec_config(sampling_rate*oversampling_ratio,
+				running_avg_filter_order))
+		{ FAILED_INIT("hdaps_set_ec_config failed"); goto bad; }
 
-		msleep(INIT_WAIT_MSECS);
-	}
+	thinkpad_ec_invalidate();
+	udelay(200);
 
-out:
-	mutex_unlock(&hdaps_mtx);
+	/* Just prefetch instead of reading, to avoid ~1sec delay on load */
+	ret = thinkpad_ec_prefetch_row(&ec_accel_args);
+	if (ret)
+		{ FAILED_INIT("initial prefetch failed"); goto bad; }
+	goto good;
+bad:
+	thinkpad_ec_invalidate();
+	ret = -ENXIO;
+good:
+	stale_readout = 1;
+	thinkpad_ec_unlock();
 	return ret;
 }
 
+/**
+ * hdaps_device_shutdown - power off the accelerometer
+ * Returns nonzero on failure. Can sleep.
+ */
+static int hdaps_device_shutdown(void)
+{
+	int ret;
+	ret = hdaps_set_power(0);
+	if (ret) {
+		pr_warn("cannot power off\n");
+		return ret;
+	}
+	ret = hdaps_set_ec_config(0, 1);
+	if (ret)
+		pr_warn("cannot stop EC sampling\n");
+	return ret;
+}
 
 /* Device model stuff */
 
@@ -306,13 +424,29 @@
 }
 
 #ifdef CONFIG_PM_SLEEP
+static int hdaps_suspend(struct device *dev)
+{
+	/* Don't do hdaps polls until resume re-initializes the sensor. */
+	del_timer_sync(&hdaps_timer);
+	hdaps_device_shutdown(); /* ignore errors, effect is negligible */
+	return 0;
+}
+
 static int hdaps_resume(struct device *dev)
 {
-	return hdaps_device_init();
+	int ret = hdaps_device_init();
+	if (ret)
+		return ret;
+
+	mutex_lock(&hdaps_users_mtx);
+	if (hdaps_users)
+		mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
+	mutex_unlock(&hdaps_users_mtx);
+	return 0;
 }
 #endif
 
-static SIMPLE_DEV_PM_OPS(hdaps_pm, NULL, hdaps_resume);
+static SIMPLE_DEV_PM_OPS(hdaps_pm, hdaps_suspend, hdaps_resume);
 
 static struct platform_driver hdaps_driver = {
 	.probe = hdaps_probe,
@@ -322,30 +456,47 @@
 	},
 };
 
-/*
- * hdaps_calibrate - Set our "resting" values.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_calibrate - set our "resting" values.
+ * Does its own locking.
  */
 static void hdaps_calibrate(void)
 {
-	__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &rest_x, &rest_y);
+	needs_calibration = 1;
+	hdaps_update();
+	/* If that fails, the mousedev poll will take care of things later. */
 }
 
-static void hdaps_mousedev_poll(struct input_polled_dev *dev)
+/* Timer handler for updating the input device. Runs in softirq context,
+ * so avoid lenghty or blocking operations.
+ */
+static void hdaps_mousedev_poll(unsigned long unused)
 {
-	struct input_dev *input_dev = dev->input;
-	int x, y;
-
-	mutex_lock(&hdaps_mtx);
+	int ret;
 
-	if (__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y))
-		goto out;
+	stale_readout = 1;
 
-	input_report_abs(input_dev, ABS_X, x - rest_x);
-	input_report_abs(input_dev, ABS_Y, y - rest_y);
-	input_sync(input_dev);
+	/* Cannot sleep.  Try nonblockingly.  If we fail, try again later. */
+	if (thinkpad_ec_try_lock())
+		goto keep_active;
+
+	ret = __hdaps_update(1); /* fast update, we're in softirq context */
+	thinkpad_ec_unlock();
+	/* Any of "successful", "not yet ready" and "not prefetched"? */
+	if (ret != 0 && ret != -EBUSY && ret != -ENODATA) {
+		pr_err("poll failed, disabling updates\n");
+		return;
+	}
 
-out:
-	mutex_unlock(&hdaps_mtx);
+keep_active:
+	/* Even if we failed now, pos_x,y may have been updated earlier: */
+	input_report_abs(hdaps_idev, ABS_X, pos_x - rest_x);
+	input_report_abs(hdaps_idev, ABS_Y, pos_y - rest_y);
+	input_sync(hdaps_idev);
+	input_report_abs(hdaps_idev_raw, ABS_X, pos_x);
+	input_report_abs(hdaps_idev_raw, ABS_Y, pos_y);
+	input_sync(hdaps_idev_raw);
+	mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
 }
 
 
@@ -354,65 +505,41 @@
 static ssize_t hdaps_position_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
-	int ret, x, y;
-
-	ret = hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y);
-	if (ret)
-		return ret;
-
-	return sprintf(buf, "(%d,%d)\n", x, y);
-}
-
-static ssize_t hdaps_variance_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	int ret, x, y;
-
-	ret = hdaps_read_pair(HDAPS_PORT_XVAR, HDAPS_PORT_YVAR, &x, &y);
+	int ret = hdaps_update();
 	if (ret)
 		return ret;
-
-	return sprintf(buf, "(%d,%d)\n", x, y);
+	return sprintf(buf, "(%d,%d)\n", pos_x, pos_y);
 }
 
 static ssize_t hdaps_temp1_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	u8 uninitialized_var(temp);
-	int ret;
-
-	ret = hdaps_readb_one(HDAPS_PORT_TEMP1, &temp);
+	int ret = hdaps_update();
 	if (ret)
 		return ret;
-
-	return sprintf(buf, "%u\n", temp);
-}
-
-static ssize_t hdaps_temp2_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	u8 uninitialized_var(temp);
-	int ret;
-
-	ret = hdaps_readb_one(HDAPS_PORT_TEMP2, &temp);
-	if (ret)
-		return ret;
-
-	return sprintf(buf, "%u\n", temp);
+	return sprintf(buf, "%d\n", temperature);
 }
 
 static ssize_t hdaps_keyboard_activity_show(struct device *dev,
 					    struct device_attribute *attr,
 					    char *buf)
 {
-	return sprintf(buf, "%u\n", KEYBD_ISSET(km_activity));
+	int ret = hdaps_update();
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n",
+	   get_jiffies_64() < last_keyboard_jiffies + KMACT_REMEMBER_PERIOD);
 }
 
 static ssize_t hdaps_mouse_activity_show(struct device *dev,
 					 struct device_attribute *attr,
 					 char *buf)
 {
-	return sprintf(buf, "%u\n", MOUSE_ISSET(km_activity));
+	int ret = hdaps_update();
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n",
+	   get_jiffies_64() < last_mouse_jiffies + KMACT_REMEMBER_PERIOD);
 }
 
 static ssize_t hdaps_calibrate_show(struct device *dev,
@@ -425,10 +552,7 @@
 				     struct device_attribute *attr,
 				     const char *buf, size_t count)
 {
-	mutex_lock(&hdaps_mtx);
 	hdaps_calibrate();
-	mutex_unlock(&hdaps_mtx);
-
 	return count;
 }
 
@@ -445,7 +569,7 @@
 	int invert;
 
 	if (sscanf(buf, "%d", &invert) != 1 ||
-	    invert < 0 || invert > HDAPS_BOTH_AXES)
+	    invert < 0 || invert > HDAPS_ORIENT_MAX)
 		return -EINVAL;
 
 	hdaps_invert = invert;
@@ -454,24 +578,128 @@
 	return count;
 }
 
+static ssize_t hdaps_sampling_rate_show(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", sampling_rate);
+}
+
+static ssize_t hdaps_sampling_rate_store(
+	struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	int rate, ret;
+	if (sscanf(buf, "%d", &rate) != 1 || rate > HZ || rate <= 0) {
+		pr_warn("must have 0<input_sampling_rate<=HZ=%d\n", HZ);
+		return -EINVAL;
+	}
+	ret = hdaps_set_ec_config(rate*oversampling_ratio,
+				  running_avg_filter_order);
+	if (ret)
+		return ret;
+	sampling_rate = rate;
+	return count;
+}
+
+static ssize_t hdaps_oversampling_ratio_show(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int ec_rate, order;
+	int ret = hdaps_get_ec_config(&ec_rate, &order);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n", ec_rate / sampling_rate);
+}
+
+static ssize_t hdaps_oversampling_ratio_store(
+	struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	int ratio, ret;
+	if (sscanf(buf, "%d", &ratio) != 1 || ratio < 1)
+		return -EINVAL;
+	ret = hdaps_set_ec_config(sampling_rate*ratio,
+				  running_avg_filter_order);
+	if (ret)
+		return ret;
+	oversampling_ratio = ratio;
+	return count;
+}
+
+static ssize_t hdaps_running_avg_filter_order_show(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int rate, order;
+	int ret = hdaps_get_ec_config(&rate, &order);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n", order);
+}
+
+static ssize_t hdaps_running_avg_filter_order_store(
+	struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	int order, ret;
+	if (sscanf(buf, "%d", &order) != 1)
+		return -EINVAL;
+	ret = hdaps_set_ec_config(sampling_rate*oversampling_ratio, order);
+	if (ret)
+		return ret;
+	running_avg_filter_order = order;
+	return count;
+}
+
+static int hdaps_mousedev_open(struct input_dev *dev)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	mutex_lock(&hdaps_users_mtx);
+	if (hdaps_users++ == 0) /* first input user */
+		mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
+	mutex_unlock(&hdaps_users_mtx);
+	return 0;
+}
+
+static void hdaps_mousedev_close(struct input_dev *dev)
+{
+	mutex_lock(&hdaps_users_mtx);
+	if (--hdaps_users == 0) /* no input users left */
+		del_timer_sync(&hdaps_timer);
+	mutex_unlock(&hdaps_users_mtx);
+
+	module_put(THIS_MODULE);
+}
+
 static DEVICE_ATTR(position, 0444, hdaps_position_show, NULL);
-static DEVICE_ATTR(variance, 0444, hdaps_variance_show, NULL);
 static DEVICE_ATTR(temp1, 0444, hdaps_temp1_show, NULL);
-static DEVICE_ATTR(temp2, 0444, hdaps_temp2_show, NULL);
-static DEVICE_ATTR(keyboard_activity, 0444, hdaps_keyboard_activity_show, NULL);
+  /* "temp1" instead of "temperature" is hwmon convention */
+static DEVICE_ATTR(keyboard_activity, 0444,
+		   hdaps_keyboard_activity_show, NULL);
 static DEVICE_ATTR(mouse_activity, 0444, hdaps_mouse_activity_show, NULL);
-static DEVICE_ATTR(calibrate, 0644, hdaps_calibrate_show,hdaps_calibrate_store);
+static DEVICE_ATTR(calibrate, 0644,
+		   hdaps_calibrate_show, hdaps_calibrate_store);
 static DEVICE_ATTR(invert, 0644, hdaps_invert_show, hdaps_invert_store);
+static DEVICE_ATTR(sampling_rate, 0644,
+		   hdaps_sampling_rate_show, hdaps_sampling_rate_store);
+static DEVICE_ATTR(oversampling_ratio, 0644,
+		   hdaps_oversampling_ratio_show,
+		   hdaps_oversampling_ratio_store);
+static DEVICE_ATTR(running_avg_filter_order, 0644,
+		   hdaps_running_avg_filter_order_show,
+		   hdaps_running_avg_filter_order_store);
 
 static struct attribute *hdaps_attributes[] = {
 	&dev_attr_position.attr,
-	&dev_attr_variance.attr,
 	&dev_attr_temp1.attr,
-	&dev_attr_temp2.attr,
 	&dev_attr_keyboard_activity.attr,
 	&dev_attr_mouse_activity.attr,
 	&dev_attr_calibrate.attr,
 	&dev_attr_invert.attr,
+	&dev_attr_sampling_rate.attr,
+	&dev_attr_oversampling_ratio.attr,
+	&dev_attr_running_avg_filter_order.attr,
 	NULL,
 };
 
@@ -482,84 +710,77 @@
 
 /* Module stuff */
 
-/* hdaps_dmi_match - found a match.  return one, short-circuiting the hunt. */
-static int __init hdaps_dmi_match(const struct dmi_system_id *id)
-{
-	pr_info("%s detected\n", id->ident);
-	return 1;
-}
-
 /* hdaps_dmi_match_invert - found an inverted match. */
 static int __init hdaps_dmi_match_invert(const struct dmi_system_id *id)
 {
-	hdaps_invert = (unsigned long)id->driver_data;
-	pr_info("inverting axis (%u) readings\n", hdaps_invert);
-	return hdaps_dmi_match(id);
+	unsigned int orient = (kernel_ulong_t) id->driver_data;
+	hdaps_invert = orient;
+	pr_info("%s detected, setting orientation %u\n", id->ident, orient);
+	return 1; /* stop enumeration */
 }
 
-#define HDAPS_DMI_MATCH_INVERT(vendor, model, axes) {	\
+#define HDAPS_DMI_MATCH_INVERT(vendor, model, orient) { \
 	.ident = vendor " " model,			\
 	.callback = hdaps_dmi_match_invert,		\
-	.driver_data = (void *)axes,			\
+	.driver_data = (void *)(orient),		\
 	.matches = {					\
 		DMI_MATCH(DMI_BOARD_VENDOR, vendor),	\
 		DMI_MATCH(DMI_PRODUCT_VERSION, model)	\
 	}						\
 }
 
-#define HDAPS_DMI_MATCH_NORMAL(vendor, model)		\
-	HDAPS_DMI_MATCH_INVERT(vendor, model, 0)
-
-/* Note that HDAPS_DMI_MATCH_NORMAL("ThinkPad T42") would match
-   "ThinkPad T42p", so the order of the entries matters.
-   If your ThinkPad is not recognized, please update to latest
-   BIOS. This is especially the case for some R52 ThinkPads. */
-static struct dmi_system_id __initdata hdaps_whitelist[] = {
-	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R50p", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R50"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R51"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R52"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61i", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T41p", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T41"),
-	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T42p", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T42"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T43"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T400", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T60", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61p", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad X40"),
-	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad X41", HDAPS_Y_AXIS),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61s", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad Z60m"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad Z61m", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad Z61p", HDAPS_BOTH_AXES),
+/* List of models with abnormal axis configuration.
+   Note that HDAPS_DMI_MATCH_NORMAL("ThinkPad T42") would match
+   "ThinkPad T42p", and enumeration stops after first match,
+   so the order of the entries matters. */
+struct dmi_system_id __initdata hdaps_whitelist[] = {
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R50p", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R60", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T41p", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T42p", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad X40", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad X41", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R60", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R400", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R500", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T60", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60 Tablet", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60s", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T400s", HDAPS_ORIENT_INVERT_X),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T400", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T410s", HDAPS_ORIENT_SWAP),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T410", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T500", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T510", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X | HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad W51O", HDAPS_ORIENT_MAX),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X200s", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X200", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X | HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X201 Tablet", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X201s", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X201", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X220", HDAPS_ORIENT_SWAP),
 	{ .ident = NULL }
 };
 
 static int __init hdaps_init(void)
 {
-	struct input_dev *idev;
 	int ret;
 
-	if (!dmi_check_system(hdaps_whitelist)) {
-		pr_warn("supported laptop not found!\n");
-		ret = -ENODEV;
-		goto out;
-	}
-
-	if (!request_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS, "hdaps")) {
-		ret = -ENXIO;
-		goto out;
-	}
-
+	/* Determine axis orientation orientation */
+	if (hdaps_invert == HDAPS_ORIENT_UNDEFINED) /* set by module param? */
+		if (dmi_check_system(hdaps_whitelist) < 1) /* in whitelist? */
+			hdaps_invert = 0; /* default */
+
+	/* Init timer before platform_driver_register, in case of suspend */
+	init_timer(&hdaps_timer);
+	hdaps_timer.function = hdaps_mousedev_poll;
 	ret = platform_driver_register(&hdaps_driver);
 	if (ret)
-		goto out_region;
+		goto out;
 
 	pdev = platform_device_register_simple("hdaps", -1, NULL, 0);
 	if (IS_ERR(pdev)) {
@@ -571,47 +792,79 @@
 	if (ret)
 		goto out_device;
 
-	hdaps_idev = input_allocate_polled_device();
+	hdaps_idev = input_allocate_device();
 	if (!hdaps_idev) {
 		ret = -ENOMEM;
 		goto out_group;
 	}
 
-	hdaps_idev->poll = hdaps_mousedev_poll;
-	hdaps_idev->poll_interval = HDAPS_POLL_INTERVAL;
+	hdaps_idev_raw = input_allocate_device();
+	if (!hdaps_idev_raw) {
+		ret = -ENOMEM;
+		goto out_idev_first;
+	}
 
-	/* initial calibrate for the input device */
-	hdaps_calibrate();
+	/* calibration for the input device (deferred to avoid delay) */
+	needs_calibration = 1;
 
-	/* initialize the input class */
-	idev = hdaps_idev->input;
-	idev->name = "hdaps";
-	idev->phys = "isa1600/input0";
-	idev->id.bustype = BUS_ISA;
-	idev->dev.parent = &pdev->dev;
-	idev->evbit[0] = BIT_MASK(EV_ABS);
-	input_set_abs_params(idev, ABS_X,
+	/* initialize the joystick-like fuzzed input device */
+	hdaps_idev->name = "ThinkPad HDAPS joystick emulation";
+	hdaps_idev->phys = "hdaps/input0";
+	hdaps_idev->id.bustype = BUS_HOST;
+	hdaps_idev->id.vendor  = HDAPS_INPUT_VENDOR;
+	hdaps_idev->id.product = HDAPS_INPUT_PRODUCT;
+	hdaps_idev->id.version = HDAPS_INPUT_JS_VERSION;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+	hdaps_idev->cdev.dev = &pdev->dev;
+#endif
+	hdaps_idev->evbit[0] = BIT(EV_ABS);
+	hdaps_idev->open = hdaps_mousedev_open;
+	hdaps_idev->close = hdaps_mousedev_close;
+	input_set_abs_params(hdaps_idev, ABS_X,
 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
-	input_set_abs_params(idev, ABS_Y,
+	input_set_abs_params(hdaps_idev, ABS_Y,
 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
 
-	ret = input_register_polled_device(hdaps_idev);
+	ret = input_register_device(hdaps_idev);
 	if (ret)
 		goto out_idev;
 
-	pr_info("driver successfully loaded\n");
+	/* initialize the raw data input device */
+	hdaps_idev_raw->name = "ThinkPad HDAPS accelerometer data";
+	hdaps_idev_raw->phys = "hdaps/input1";
+	hdaps_idev_raw->id.bustype = BUS_HOST;
+	hdaps_idev_raw->id.vendor  = HDAPS_INPUT_VENDOR;
+	hdaps_idev_raw->id.product = HDAPS_INPUT_PRODUCT;
+	hdaps_idev_raw->id.version = HDAPS_INPUT_RAW_VERSION;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+	hdaps_idev_raw->cdev.dev = &pdev->dev;
+#endif
+	hdaps_idev_raw->evbit[0] = BIT(EV_ABS);
+	hdaps_idev_raw->open = hdaps_mousedev_open;
+	hdaps_idev_raw->close = hdaps_mousedev_close;
+	input_set_abs_params(hdaps_idev_raw, ABS_X, -32768, 32767, 0, 0);
+	input_set_abs_params(hdaps_idev_raw, ABS_Y, -32768, 32767, 0, 0);
+
+	ret = input_register_device(hdaps_idev_raw);
+	if (ret)
+		goto out_idev_reg_first;
+
+	pr_info("driver successfully loaded.\n");
 	return 0;
 
+out_idev_reg_first:
+	input_unregister_device(hdaps_idev);
 out_idev:
-	input_free_polled_device(hdaps_idev);
+	input_free_device(hdaps_idev_raw);
+out_idev_first:
+	input_free_device(hdaps_idev);
 out_group:
 	sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
 out_device:
 	platform_device_unregister(pdev);
 out_driver:
 	platform_driver_unregister(&hdaps_driver);
-out_region:
-	release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS);
+	hdaps_device_shutdown();
 out:
 	pr_warn("driver init failed (ret=%d)!\n", ret);
 	return ret;
@@ -619,12 +872,12 @@
 
 static void __exit hdaps_exit(void)
 {
-	input_unregister_polled_device(hdaps_idev);
-	input_free_polled_device(hdaps_idev);
+	input_unregister_device(hdaps_idev_raw);
+	input_unregister_device(hdaps_idev);
+	hdaps_device_shutdown(); /* ignore errors, effect is negligible */
 	sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
 	platform_device_unregister(pdev);
 	platform_driver_unregister(&hdaps_driver);
-	release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS);
 
 	pr_info("driver unloaded\n");
 }
@@ -632,9 +885,8 @@
 module_init(hdaps_init);
 module_exit(hdaps_exit);
 
-module_param_named(invert, hdaps_invert, int, 0);
-MODULE_PARM_DESC(invert, "invert data along each axis. 1 invert x-axis, "
-		 "2 invert y-axis, 3 invert both axes.");
+module_param_named(invert, hdaps_invert, uint, 0);
+MODULE_PARM_DESC(invert, "axis orientation code");
 
 MODULE_AUTHOR("Robert Love");
 MODULE_DESCRIPTION("IBM Hard Drive Active Protection System (HDAPS) driver");
diff -Nur linux-4.2/drivers/platform/x86/thinkpad_ec.c linux-4.2-pck/drivers/platform/x86/thinkpad_ec.c
--- linux-4.2/drivers/platform/x86/thinkpad_ec.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/drivers/platform/x86/thinkpad_ec.c	2015-09-08 00:48:22.225030705 -0300
@@ -0,0 +1,513 @@
+/*
+ *  thinkpad_ec.c - ThinkPad embedded controller LPC3 functions
+ *
+ *  The embedded controller on ThinkPad laptops has a non-standard interface,
+ *  where LPC channel 3 of the H8S EC chip is hooked up to IO ports
+ *  0x1600-0x161F and implements (a special case of) the H8S LPC protocol.
+ *  The EC LPC interface provides various system management services (currently
+ *  known: battery information and accelerometer readouts). This driver
+ *  provides access and mutual exclusion for the EC interface.
+*
+ *  The LPC protocol and terminology are documented here:
+ *  "H8S/2104B Group Hardware Manual",
+ *  http://documentation.renesas.com/eng/products/mpumcu/rej09b0300_2140bhm.pdf
+ *
+ *  Copyright (C) 2006-2007 Shem Multinymous <multinymous@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/thinkpad_ec.h>
+#include <linux/jiffies.h>
+#include <asm/io.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
+	#include <asm/semaphore.h>
+#else
+	#include <linux/semaphore.h>
+#endif
+
+#define TP_VERSION "0.41"
+
+MODULE_AUTHOR("Shem Multinymous");
+MODULE_DESCRIPTION("ThinkPad embedded controller hardware access");
+MODULE_VERSION(TP_VERSION);
+MODULE_LICENSE("GPL");
+
+/* IO ports used by embedded controller LPC channel 3: */
+#define TPC_BASE_PORT 0x1600
+#define TPC_NUM_PORTS 0x20
+#define TPC_STR3_PORT 0x1604  /* Reads H8S EC register STR3 */
+#define TPC_TWR0_PORT  0x1610 /* Mapped to H8S EC register TWR0MW/SW  */
+#define TPC_TWR15_PORT 0x161F /* Mapped to H8S EC register TWR15. */
+  /* (and port TPC_TWR0_PORT+i is mapped to H8S reg TWRi for 0<i<16) */
+
+/* H8S STR3 status flags (see "H8S/2104B Group Hardware Manual" p.549) */
+#define H8S_STR3_IBF3B 0x80  /* Bidi. Data Register Input Buffer Full */
+#define H8S_STR3_OBF3B 0x40  /* Bidi. Data Register Output Buffer Full */
+#define H8S_STR3_MWMF  0x20  /* Master Write Mode Flag */
+#define H8S_STR3_SWMF  0x10  /* Slave Write Mode Flag */
+#define H8S_STR3_MASK  0xF0  /* All bits we care about in STR3 */
+
+/* Timeouts and retries */
+#define TPC_READ_RETRIES     150
+#define TPC_READ_NDELAY      500
+#define TPC_REQUEST_RETRIES 1000
+#define TPC_REQUEST_NDELAY    10
+#define TPC_PREFETCH_TIMEOUT   (HZ/10)  /* invalidate prefetch after 0.1sec */
+
+/* A few macros for printk()ing: */
+#define MSG_FMT(fmt, args...) \
+  "thinkpad_ec: %s: " fmt "\n", __func__, ## args
+#define REQ_FMT(msg, code) \
+  MSG_FMT("%s: (0x%02x:0x%02x)->0x%02x", \
+	  msg, args->val[0x0], args->val[0xF], code)
+
+/* State of request prefetching: */
+static u8 prefetch_arg0, prefetch_argF;           /* Args of last prefetch */
+static u64 prefetch_jiffies;                      /* time of prefetch, or: */
+#define TPC_PREFETCH_NONE   INITIAL_JIFFIES       /*   No prefetch */
+#define TPC_PREFETCH_JUNK   (INITIAL_JIFFIES+1)   /*   Ignore prefetch */
+
+/* Locking: */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+static DECLARE_MUTEX(thinkpad_ec_mutex);
+#else
+static DEFINE_SEMAPHORE(thinkpad_ec_mutex);
+#endif
+
+/* Kludge in case the ACPI DSDT reserves the ports we need. */
+static bool force_io;    /* Willing to do IO to ports we couldn't reserve? */
+static int reserved_io; /* Successfully reserved the ports? */
+module_param_named(force_io, force_io, bool, 0600);
+MODULE_PARM_DESC(force_io, "Force IO even if region already reserved (0=off, 1=on)");
+
+/**
+ * thinkpad_ec_lock - get lock on the ThinkPad EC
+ *
+ * Get exclusive lock for accesing the ThinkPad embedded controller LPC3
+ * interface. Returns 0 iff lock acquired.
+ */
+int thinkpad_ec_lock(void)
+{
+	int ret;
+	ret = down_interruptible(&thinkpad_ec_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_lock);
+
+/**
+ * thinkpad_ec_try_lock - try getting lock on the ThinkPad EC
+ *
+ * Try getting an exclusive lock for accesing the ThinkPad embedded
+ * controller LPC3. Returns immediately if lock is not available; neither
+ * blocks nor sleeps. Returns 0 iff lock acquired .
+ */
+int thinkpad_ec_try_lock(void)
+{
+	return down_trylock(&thinkpad_ec_mutex);
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_try_lock);
+
+/**
+ * thinkpad_ec_unlock - release lock on ThinkPad EC
+ *
+ * Release a previously acquired exclusive lock on the ThinkPad ebmedded
+ * controller LPC3 interface.
+ */
+void thinkpad_ec_unlock(void)
+{
+	up(&thinkpad_ec_mutex);
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_unlock);
+
+/**
+ * thinkpad_ec_request_row - tell embedded controller to prepare a row
+ * @args Input register arguments
+ *
+ * Requests a data row by writing to H8S LPC registers TRW0 through TWR15 (or
+ * a subset thereof) following the protocol prescribed by the "H8S/2104B Group
+ * Hardware Manual". Does sanity checks via status register STR3.
+ */
+static int thinkpad_ec_request_row(const struct thinkpad_ec_row *args)
+{
+	u8 str3;
+	int i;
+
+	/* EC protocol requires write to TWR0 (function code): */
+	if (!(args->mask & 0x0001)) {
+		printk(KERN_ERR MSG_FMT("bad args->mask=0x%02x", args->mask));
+		return -EINVAL;
+	}
+
+	/* Check initial STR3 status: */
+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	if (str3 & H8S_STR3_OBF3B) { /* data already pending */
+		inb(TPC_TWR15_PORT); /* marks end of previous transaction */
+		if (prefetch_jiffies == TPC_PREFETCH_NONE)
+			printk(KERN_WARNING REQ_FMT(
+			       "EC has result from unrequested transaction",
+			       str3));
+		return -EBUSY; /* EC will be ready in a few usecs */
+	} else if (str3 == H8S_STR3_SWMF) { /* busy with previous request */
+		if (prefetch_jiffies == TPC_PREFETCH_NONE)
+			printk(KERN_WARNING REQ_FMT(
+			       "EC is busy with unrequested transaction",
+			       str3));
+		return -EBUSY; /* data will be pending in a few usecs */
+	} else if (str3 != 0x00) { /* unexpected status? */
+		printk(KERN_WARNING REQ_FMT("unexpected initial STR3", str3));
+		return -EIO;
+	}
+
+	/* Send TWR0MW: */
+	outb(args->val[0], TPC_TWR0_PORT);
+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	if (str3 != H8S_STR3_MWMF) { /* not accepted? */
+		printk(KERN_WARNING REQ_FMT("arg0 rejected", str3));
+		return -EIO;
+	}
+
+	/* Send TWR1 through TWR14: */
+	for (i = 1; i < TP_CONTROLLER_ROW_LEN-1; i++)
+		if ((args->mask>>i)&1)
+			outb(args->val[i], TPC_TWR0_PORT+i);
+
+	/* Send TWR15 (default to 0x01). This marks end of command. */
+	outb((args->mask & 0x8000) ? args->val[0xF] : 0x01, TPC_TWR15_PORT);
+
+	/* Wait until EC starts writing its reply (~60ns on average).
+	 * Releasing locks before this happens may cause an EC hang
+	 * due to firmware bug!
+	 */
+	for (i = 0; i < TPC_REQUEST_RETRIES; i++) {
+		str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+		if (str3 & H8S_STR3_SWMF) /* EC started replying */
+			return 0;
+		else if (!(str3 & ~(H8S_STR3_IBF3B|H8S_STR3_MWMF)))
+			/* Normal progress (the EC hasn't seen the request
+			 * yet, or is processing it). Wait it out. */
+			ndelay(TPC_REQUEST_NDELAY);
+		else { /* weird EC status */
+			printk(KERN_WARNING
+			       REQ_FMT("bad end STR3", str3));
+			return -EIO;
+		}
+	}
+	printk(KERN_WARNING REQ_FMT("EC is mysteriously silent", str3));
+	return -EIO;
+}
+
+/**
+ * thinkpad_ec_read_data - read pre-requested row-data from EC
+ * @args Input register arguments of pre-requested rows
+ * @data Output register values
+ *
+ * Reads current row data from the controller, assuming it's already
+ * requested. Follows the H8S spec for register access and status checks.
+ */
+static int thinkpad_ec_read_data(const struct thinkpad_ec_row *args,
+				 struct thinkpad_ec_row *data)
+{
+	int i;
+	u8 str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	/* Once we make a request, STR3 assumes the sequence of values listed
+	 * in the following 'if' as it reads the request and writes its data.
+	 * It takes about a few dozen nanosecs total, with very high variance.
+	 */
+	if (str3 == (H8S_STR3_IBF3B|H8S_STR3_MWMF) ||
+	    str3 == 0x00 ||  /* the 0x00 is indistinguishable from idle EC! */
+	    str3 == H8S_STR3_SWMF)
+		return -EBUSY; /* not ready yet */
+	/* Finally, the EC signals output buffer full: */
+	if (str3 != (H8S_STR3_OBF3B|H8S_STR3_SWMF)) {
+		printk(KERN_WARNING
+		       REQ_FMT("bad initial STR3", str3));
+		return -EIO;
+	}
+
+	/* Read first byte (signals start of read transactions): */
+	data->val[0] = inb(TPC_TWR0_PORT);
+	/* Optionally read 14 more bytes: */
+	for (i = 1; i < TP_CONTROLLER_ROW_LEN-1; i++)
+		if ((data->mask >> i)&1)
+			data->val[i] = inb(TPC_TWR0_PORT+i);
+	/* Read last byte from 0x161F (signals end of read transaction): */
+	data->val[0xF] = inb(TPC_TWR15_PORT);
+
+	/* Readout still pending? */
+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	if (str3 & H8S_STR3_OBF3B)
+		printk(KERN_WARNING
+		       REQ_FMT("OBF3B=1 after read", str3));
+	/* If port 0x161F returns 0x80 too often, the EC may lock up. Warn: */
+	if (data->val[0xF] == 0x80)
+		printk(KERN_WARNING
+		       REQ_FMT("0x161F reports error", data->val[0xF]));
+	return 0;
+}
+
+/**
+ * thinkpad_ec_is_row_fetched - is the given row currently prefetched?
+ *
+ * To keep things simple we compare only the first and last args;
+ * this suffices for all known cases.
+ */
+static int thinkpad_ec_is_row_fetched(const struct thinkpad_ec_row *args)
+{
+	return (prefetch_jiffies != TPC_PREFETCH_NONE) &&
+	       (prefetch_jiffies != TPC_PREFETCH_JUNK) &&
+	       (prefetch_arg0 == args->val[0]) &&
+	       (prefetch_argF == args->val[0xF]) &&
+	       (get_jiffies_64() < prefetch_jiffies + TPC_PREFETCH_TIMEOUT);
+}
+
+/**
+ * thinkpad_ec_read_row - request and read data from ThinkPad EC
+ * @args Input register arguments
+ * @data Output register values
+ *
+ * Read a data row from the ThinkPad embedded controller LPC3 interface.
+ * Does fetching and retrying if needed. The row is specified by an
+ * array of 16 bytes, some of which may be undefined (but the first is
+ * mandatory). These bytes are given in @args->val[], where @args->val[i] is
+ * used iff (@args->mask>>i)&1). The resulting row data is stored in
+ * @data->val[], but is only guaranteed to be valid for indices corresponding
+ * to set bit in @data->mask. That is, if @data->mask&(1<<i)==0 then
+ * @data->val[i] is undefined.
+ *
+ * Returns -EBUSY on transient error and -EIO on abnormal condition.
+ * Caller must hold controller lock.
+ */
+int thinkpad_ec_read_row(const struct thinkpad_ec_row *args,
+			 struct thinkpad_ec_row *data)
+{
+	int retries, ret;
+
+	if (thinkpad_ec_is_row_fetched(args))
+		goto read_row; /* already requested */
+
+	/* Request the row */
+	for (retries = 0; retries < TPC_READ_RETRIES; ++retries) {
+		ret = thinkpad_ec_request_row(args);
+		if (!ret)
+			goto read_row;
+		if (ret != -EBUSY)
+			break;
+		ndelay(TPC_READ_NDELAY);
+	}
+	printk(KERN_ERR REQ_FMT("failed requesting row", ret));
+	goto out;
+
+read_row:
+	/* Read the row's data */
+	for (retries = 0; retries < TPC_READ_RETRIES; ++retries) {
+		ret = thinkpad_ec_read_data(args, data);
+		if (!ret)
+			goto out;
+		if (ret != -EBUSY)
+			break;
+		ndelay(TPC_READ_NDELAY);
+	}
+
+	printk(KERN_ERR REQ_FMT("failed waiting for data", ret));
+
+out:
+	prefetch_jiffies = TPC_PREFETCH_JUNK;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_read_row);
+
+/**
+ * thinkpad_ec_try_read_row - try reading prefetched data from ThinkPad EC
+ * @args Input register arguments
+ * @data Output register values
+ *
+ * Try reading a data row from the ThinkPad embedded controller LPC3
+ * interface, if this raw was recently prefetched using
+ * thinkpad_ec_prefetch_row(). Does not fetch, retry or block.
+ * The parameters have the same meaning as in thinkpad_ec_read_row().
+ *
+ * Returns -EBUSY is data not ready and -ENODATA if row not prefetched.
+ * Caller must hold controller lock.
+ */
+int thinkpad_ec_try_read_row(const struct thinkpad_ec_row *args,
+			     struct thinkpad_ec_row *data)
+{
+	int ret;
+	if (!thinkpad_ec_is_row_fetched(args)) {
+		ret = -ENODATA;
+	} else {
+		ret = thinkpad_ec_read_data(args, data);
+		if (!ret)
+			prefetch_jiffies = TPC_PREFETCH_NONE; /* eaten up */
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_try_read_row);
+
+/**
+ * thinkpad_ec_prefetch_row - prefetch data from ThinkPad EC
+ * @args Input register arguments
+ *
+ * Prefetch a data row from the ThinkPad embedded controller LCP3
+ * interface. A subsequent call to thinkpad_ec_read_row() with the
+ * same arguments will be faster, and a subsequent call to
+ * thinkpad_ec_try_read_row() stands a good chance of succeeding if
+ * done neither too soon nor too late. See
+ * thinkpad_ec_read_row() for the meaning of @args.
+ *
+ * Returns -EBUSY on transient error and -EIO on abnormal condition.
+ * Caller must hold controller lock.
+ */
+int thinkpad_ec_prefetch_row(const struct thinkpad_ec_row *args)
+{
+	int ret;
+	ret = thinkpad_ec_request_row(args);
+	if (ret) {
+		prefetch_jiffies = TPC_PREFETCH_JUNK;
+	} else {
+		prefetch_jiffies = get_jiffies_64();
+		prefetch_arg0 = args->val[0x0];
+		prefetch_argF = args->val[0xF];
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_prefetch_row);
+
+/**
+ * thinkpad_ec_invalidate - invalidate prefetched ThinkPad EC data
+ *
+ * Invalidate the data prefetched via thinkpad_ec_prefetch_row() from the
+ * ThinkPad embedded controller LPC3 interface.
+ * Must be called before unlocking by any code that accesses the controller
+ * ports directly.
+ */
+void thinkpad_ec_invalidate(void)
+{
+	prefetch_jiffies = TPC_PREFETCH_JUNK;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_invalidate);
+
+
+/*** Checking for EC hardware ***/
+
+/**
+ * thinkpad_ec_test - verify the EC is present and follows protocol
+ *
+ * Ensure the EC LPC3 channel really works on this machine by making
+ * an EC request and seeing if the EC follows the documented H8S protocol.
+ * The requested row just reads battery status, so it should be harmless to
+ * access it (on a correct EC).
+ * This test writes to IO ports, so execute only after checking DMI.
+ */
+static int __init thinkpad_ec_test(void)
+{
+	int ret;
+	const struct thinkpad_ec_row args = /* battery 0 basic status */
+	  { .mask = 0x8001, .val = {0x01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00} };
+	struct thinkpad_ec_row data = { .mask = 0x0000 };
+	ret = thinkpad_ec_lock();
+	if (ret)
+		return ret;
+	ret = thinkpad_ec_read_row(&args, &data);
+	thinkpad_ec_unlock();
+	return ret;
+}
+
+/* Search all DMI device names of a given type for a substring */
+static int __init dmi_find_substring(int type, const char *substr)
+{
+	const struct dmi_device *dev = NULL;
+	while ((dev = dmi_find_device(type, NULL, dev))) {
+		if (strstr(dev->name, substr))
+			return 1;
+	}
+	return 0;
+}
+
+#define TP_DMI_MATCH(vendor,model)	{		\
+	.ident = vendor " " model,			\
+	.matches = {					\
+		DMI_MATCH(DMI_BOARD_VENDOR, vendor),	\
+		DMI_MATCH(DMI_PRODUCT_VERSION, model)	\
+	}						\
+}
+
+/* Check DMI for existence of ThinkPad embedded controller */
+static int __init check_dmi_for_ec(void)
+{
+	/* A few old models that have a good EC but don't report it in DMI */
+	struct dmi_system_id tp_whitelist[] = {
+		TP_DMI_MATCH("IBM", "ThinkPad A30"),
+		TP_DMI_MATCH("IBM", "ThinkPad T23"),
+		TP_DMI_MATCH("IBM", "ThinkPad X24"),
+		TP_DMI_MATCH("LENOVO", "ThinkPad"),
+		{ .ident = NULL }
+	};
+	return dmi_find_substring(DMI_DEV_TYPE_OEM_STRING,
+				  "IBM ThinkPad Embedded Controller") ||
+	       dmi_check_system(tp_whitelist);
+}
+
+/*** Init and cleanup ***/
+
+static int __init thinkpad_ec_init(void)
+{
+	if (!check_dmi_for_ec()) {
+		printk(KERN_WARNING
+		       "thinkpad_ec: no ThinkPad embedded controller!\n");
+		return -ENODEV;
+	}
+
+	if (request_region(TPC_BASE_PORT, TPC_NUM_PORTS, "thinkpad_ec")) {
+		reserved_io = 1;
+	} else {
+		printk(KERN_ERR "thinkpad_ec: cannot claim IO ports %#x-%#x... ",
+		       TPC_BASE_PORT,
+		       TPC_BASE_PORT + TPC_NUM_PORTS - 1);
+		if (force_io) {
+			printk("forcing use of unreserved IO ports.\n");
+		} else {
+			printk("consider using force_io=1.\n");
+			return -ENXIO;
+		}
+	}
+	prefetch_jiffies = TPC_PREFETCH_JUNK;
+	if (thinkpad_ec_test()) {
+		printk(KERN_ERR "thinkpad_ec: initial ec test failed\n");
+		if (reserved_io)
+			release_region(TPC_BASE_PORT, TPC_NUM_PORTS);
+		return -ENXIO;
+	}
+	printk(KERN_INFO "thinkpad_ec: thinkpad_ec " TP_VERSION " loaded.\n");
+	return 0;
+}
+
+static void __exit thinkpad_ec_exit(void)
+{
+	if (reserved_io)
+		release_region(TPC_BASE_PORT, TPC_NUM_PORTS);
+	printk(KERN_INFO "thinkpad_ec: unloaded.\n");
+}
+
+module_init(thinkpad_ec_init);
+module_exit(thinkpad_ec_exit);
diff -Nur linux-4.2/drivers/platform/x86/tp_smapi.c linux-4.2-pck/drivers/platform/x86/tp_smapi.c
--- linux-4.2/drivers/platform/x86/tp_smapi.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/drivers/platform/x86/tp_smapi.c	2015-09-08 00:48:22.225030705 -0300
@@ -0,0 +1,1493 @@
+/*
+ *  tp_smapi.c - ThinkPad SMAPI support
+ *
+ *  This driver exposes some features of the System Management Application
+ *  Program Interface (SMAPI) BIOS found on ThinkPad laptops. It works on
+ *  models in which the SMAPI BIOS runs in SMM and is invoked by writing
+ *  to the APM control port 0xB2.
+ *  It also exposes battery status information, obtained from the ThinkPad
+ *  embedded controller (via the thinkpad_ec module).
+ *  Ancient ThinkPad models use a different interface, supported by the
+ *  "thinkpad" module from "tpctl".
+ *
+ *  Many of the battery status values obtained from the EC simply mirror
+ *  values provided by the battery's Smart Battery System (SBS) interface, so
+ *  their meaning is defined by the Smart Battery Data Specification (see
+ *  http://sbs-forum.org/specs/sbdat110.pdf). References to this SBS spec
+ *  are given in the code where relevant.
+ *
+ *  Copyright (C) 2006 Shem Multinymous <multinymous@gmail.com>.
+ *  SMAPI access code based on the mwave driver by Mike Sullivan.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/mc146818rtc.h>	/* CMOS defines */
+#include <linux/delay.h>
+#include <linux/version.h>
+#include <linux/thinkpad_ec.h>
+#include <linux/platform_device.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#define TP_VERSION "0.41"
+#define TP_DESC "ThinkPad SMAPI Support"
+#define TP_DIR "smapi"
+
+MODULE_AUTHOR("Shem Multinymous");
+MODULE_DESCRIPTION(TP_DESC);
+MODULE_VERSION(TP_VERSION);
+MODULE_LICENSE("GPL");
+
+static struct platform_device *pdev;
+
+static int tp_debug;
+module_param_named(debug, tp_debug, int, 0600);
+MODULE_PARM_DESC(debug, "Debug level (0=off, 1=on)");
+
+/* A few macros for printk()ing: */
+#define TPRINTK(level, fmt, args...) \
+  dev_printk(level, &(pdev->dev), "%s: " fmt "\n", __func__, ## args)
+#define DPRINTK(fmt, args...) \
+  do { if (tp_debug) TPRINTK(KERN_DEBUG, fmt, ## args); } while (0)
+
+/*********************************************************************
+ * SMAPI interface
+ */
+
+/* SMAPI functions (register BX when making the SMM call). */
+#define SMAPI_GET_INHIBIT_CHARGE                0x2114
+#define SMAPI_SET_INHIBIT_CHARGE                0x2115
+#define SMAPI_GET_THRESH_START                  0x2116
+#define SMAPI_SET_THRESH_START                  0x2117
+#define SMAPI_GET_FORCE_DISCHARGE               0x2118
+#define SMAPI_SET_FORCE_DISCHARGE               0x2119
+#define SMAPI_GET_THRESH_STOP                   0x211a
+#define SMAPI_SET_THRESH_STOP                   0x211b
+
+/* SMAPI error codes (see ThinkPad 770 Technical Reference Manual p.83 at
+ http://www-307.ibm.com/pc/support/site.wss/document.do?lndocid=PFAN-3TUQQD */
+#define SMAPI_RETCODE_EOF 0xff
+static struct { u8 rc; char *msg; int ret; } smapi_retcode[] =
+{
+	{0x00, "OK", 0},
+	{0x53, "SMAPI function is not available", -ENXIO},
+	{0x81, "Invalid parameter", -EINVAL},
+	{0x86, "Function is not supported by SMAPI BIOS", -EOPNOTSUPP},
+	{0x90, "System error", -EIO},
+	{0x91, "System is invalid", -EIO},
+	{0x92, "System is busy, -EBUSY"},
+	{0xa0, "Device error (disk read error)", -EIO},
+	{0xa1, "Device is busy", -EBUSY},
+	{0xa2, "Device is not attached", -ENXIO},
+	{0xa3, "Device is disbled", -EIO},
+	{0xa4, "Request parameter is out of range", -EINVAL},
+	{0xa5, "Request parameter is not accepted", -EINVAL},
+	{0xa6, "Transient error", -EBUSY}, /* ? */
+	{SMAPI_RETCODE_EOF, "Unknown error code", -EIO}
+};
+
+
+#define SMAPI_MAX_RETRIES 10
+#define SMAPI_PORT2 0x4F           /* fixed port, meaning unclear */
+static unsigned short smapi_port;  /* APM control port, normally 0xB2 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+static DECLARE_MUTEX(smapi_mutex);
+#else
+static DEFINE_SEMAPHORE(smapi_mutex);
+#endif
+
+/**
+ * find_smapi_port - read SMAPI port from NVRAM
+ */
+static int __init find_smapi_port(void)
+{
+	u16 smapi_id = 0;
+	unsigned short port = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	smapi_id = CMOS_READ(0x7C);
+	smapi_id |= (CMOS_READ(0x7D) << 8);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	if (smapi_id != 0x5349) {
+		printk(KERN_ERR "SMAPI not supported (ID=0x%x)\n", smapi_id);
+		return -ENXIO;
+	}
+	spin_lock_irqsave(&rtc_lock, flags);
+	port = CMOS_READ(0x7E);
+	port |= (CMOS_READ(0x7F) << 8);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	if (port == 0) {
+		printk(KERN_ERR "unable to read SMAPI port number\n");
+		return -ENXIO;
+	}
+	return port;
+}
+
+/**
+ * smapi_request - make a SMAPI call
+ * @inEBX, @inECX, @inEDI, @inESI: input registers
+ * @outEBX, @outECX, @outEDX, @outEDI, @outESI: outputs registers
+ * @msg: textual error message
+ * Invokes the SMAPI SMBIOS with the given input and outpu args.
+ * All outputs are optional (can be %NULL).
+ * Returns 0 when successful, and a negative errno constant
+ * (see smapi_retcode above) upon failure.
+ */
+static int smapi_request(u32 inEBX, u32 inECX,
+			 u32 inEDI, u32 inESI,
+			 u32 *outEBX, u32 *outECX, u32 *outEDX,
+			 u32 *outEDI, u32 *outESI, const char **msg)
+{
+	int ret = 0;
+	int i;
+	int retries;
+	u8 rc;
+	/* Must use local vars for output regs, due to reg pressure. */
+	u32 tmpEAX, tmpEBX, tmpECX, tmpEDX, tmpEDI, tmpESI;
+
+	for (retries = 0; retries < SMAPI_MAX_RETRIES; ++retries) {
+		DPRINTK("req_in: BX=%x CX=%x DI=%x SI=%x",
+			inEBX, inECX, inEDI, inESI);
+
+		/* SMAPI's SMBIOS call and thinkpad_ec end up using use
+		 * different interfaces to the same chip, so play it safe. */
+		ret = thinkpad_ec_lock();
+		if (ret)
+			return ret;
+
+		__asm__ __volatile__(
+			"movl  $0x00005380,%%eax\n\t"
+			"movl  %6,%%ebx\n\t"
+			"movl  %7,%%ecx\n\t"
+			"movl  %8,%%edi\n\t"
+			"movl  %9,%%esi\n\t"
+			"xorl  %%edx,%%edx\n\t"
+			"movw  %10,%%dx\n\t"
+			"out   %%al,%%dx\n\t"  /* trigger SMI to SMBIOS */
+			"out   %%al,$0x4F\n\t"
+			"movl  %%eax,%0\n\t"
+			"movl  %%ebx,%1\n\t"
+			"movl  %%ecx,%2\n\t"
+			"movl  %%edx,%3\n\t"
+			"movl  %%edi,%4\n\t"
+			"movl  %%esi,%5\n\t"
+			:"=m"(tmpEAX),
+			 "=m"(tmpEBX),
+			 "=m"(tmpECX),
+			 "=m"(tmpEDX),
+			 "=m"(tmpEDI),
+			 "=m"(tmpESI)
+			:"m"(inEBX), "m"(inECX), "m"(inEDI), "m"(inESI),
+			 "m"((u16)smapi_port)
+			:"%eax", "%ebx", "%ecx", "%edx", "%edi",
+			 "%esi");
+
+		thinkpad_ec_invalidate();
+		thinkpad_ec_unlock();
+
+		/* Don't let the next SMAPI access happen too quickly,
+		 * may case problems. (We're hold smapi_mutex).       */
+		msleep(50);
+
+		if (outEBX) *outEBX = tmpEBX;
+		if (outECX) *outECX = tmpECX;
+		if (outEDX) *outEDX = tmpEDX;
+		if (outESI) *outESI = tmpESI;
+		if (outEDI) *outEDI = tmpEDI;
+
+		/* Look up error code */
+		rc = (tmpEAX>>8)&0xFF;
+		for (i = 0; smapi_retcode[i].rc != SMAPI_RETCODE_EOF &&
+			    smapi_retcode[i].rc != rc; ++i) {}
+		ret = smapi_retcode[i].ret;
+		if (msg)
+			*msg = smapi_retcode[i].msg;
+
+		DPRINTK("req_out: AX=%x BX=%x CX=%x DX=%x DI=%x SI=%x r=%d",
+			 tmpEAX, tmpEBX, tmpECX, tmpEDX, tmpEDI, tmpESI, ret);
+		if (ret)
+			TPRINTK(KERN_NOTICE, "SMAPI error: %s (func=%x)",
+				smapi_retcode[i].msg, inEBX);
+
+		if (ret != -EBUSY)
+			return ret;
+	}
+	return ret;
+}
+
+/* Convenience wrapper: discard output arguments */
+static int smapi_write(u32 inEBX, u32 inECX,
+		       u32 inEDI, u32 inESI, const char **msg)
+{
+	return smapi_request(inEBX, inECX, inEDI, inESI,
+			     NULL, NULL, NULL, NULL, NULL, msg);
+}
+
+
+/*********************************************************************
+ * Specific SMAPI services
+ * All of these functions return 0 upon success, and a negative errno
+ * constant (see smapi_retcode) on failure.
+ */
+
+enum thresh_type {
+	THRESH_STOP  = 0, /* the code assumes this is 0 for brevity */
+	THRESH_START
+};
+#define THRESH_NAME(which) ((which == THRESH_START) ? "start" : "stop")
+
+/**
+ * __get_real_thresh - read battery charge start/stop threshold from SMAPI
+ * @bat:    battery number (0 or 1)
+ * @which:  THRESH_START or THRESH_STOP
+ * @thresh: 1..99, 0=default 1..99, 0=default (pass this as-is to SMAPI)
+ * @outEDI: some additional state that needs to be preserved, meaning unknown
+ * @outESI: some additional state that needs to be preserved, meaning unknown
+ */
+static int __get_real_thresh(int bat, enum thresh_type which, int *thresh,
+			     u32 *outEDI, u32 *outESI)
+{
+	u32 ebx = (which == THRESH_START) ? SMAPI_GET_THRESH_START
+					  : SMAPI_GET_THRESH_STOP;
+	u32 ecx = (bat+1)<<8;
+	const char *msg;
+	int ret = smapi_request(ebx, ecx, 0, 0, NULL,
+				&ecx, NULL, outEDI, outESI, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "cannot get %s_thresh of bat=%d: %s",
+			THRESH_NAME(which), bat, msg);
+		return ret;
+	}
+	if (!(ecx&0x00000100)) {
+		TPRINTK(KERN_NOTICE, "cannot get %s_thresh of bat=%d: ecx=0%x",
+			THRESH_NAME(which), bat, ecx);
+		return -EIO;
+	}
+	if (thresh)
+		*thresh = ecx&0xFF;
+	return 0;
+}
+
+/**
+ * get_real_thresh - read battery charge start/stop threshold from SMAPI
+ * @bat:    battery number (0 or 1)
+ * @which:  THRESH_START or THRESH_STOP
+ * @thresh: 1..99, 0=default (passes as-is to SMAPI)
+ */
+static int get_real_thresh(int bat, enum thresh_type which, int *thresh)
+{
+	return __get_real_thresh(bat, which, thresh, NULL, NULL);
+}
+
+/**
+ * set_real_thresh - write battery start/top charge threshold to SMAPI
+ * @bat:    battery number (0 or 1)
+ * @which:  THRESH_START or THRESH_STOP
+ * @thresh: 1..99, 0=default (passes as-is to SMAPI)
+ */
+static int set_real_thresh(int bat, enum thresh_type which, int thresh)
+{
+	u32 ebx = (which == THRESH_START) ? SMAPI_SET_THRESH_START
+					  : SMAPI_SET_THRESH_STOP;
+	u32 ecx = ((bat+1)<<8) + thresh;
+	u32 getDI, getSI;
+	const char *msg;
+	int ret;
+
+	/* verify read before writing */
+	ret = __get_real_thresh(bat, which, NULL, &getDI, &getSI);
+	if (ret)
+		return ret;
+
+	ret = smapi_write(ebx, ecx, getDI, getSI, &msg);
+	if (ret)
+		TPRINTK(KERN_NOTICE, "set %s to %d for bat=%d failed: %s",
+			THRESH_NAME(which), thresh, bat, msg);
+	else
+		TPRINTK(KERN_INFO, "set %s to %d for bat=%d",
+			THRESH_NAME(which), thresh, bat);
+	return ret;
+}
+
+/**
+ * __get_inhibit_charge_minutes - get inhibit charge period from SMAPI
+ * @bat:     battery number (0 or 1)
+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
+ * @outECX: some additional state that needs to be preserved, meaning unknown
+ * Note that @minutes is the originally set value, it does not count down.
+ */
+static int __get_inhibit_charge_minutes(int bat, int *minutes, u32 *outECX)
+{
+	u32 ecx = (bat+1)<<8;
+	u32 esi;
+	const char *msg;
+	int ret = smapi_request(SMAPI_GET_INHIBIT_CHARGE, ecx, 0, 0,
+				NULL, &ecx, NULL, NULL, &esi, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "failed for bat=%d: %s", bat, msg);
+		return ret;
+	}
+	if (!(ecx&0x0100)) {
+		TPRINTK(KERN_NOTICE, "bad ecx=0x%x for bat=%d", ecx, bat);
+		return -EIO;
+	}
+	if (minutes)
+		*minutes = (ecx&0x0001)?esi:0;
+	if (outECX)
+		*outECX = ecx;
+	return 0;
+}
+
+/**
+ * get_inhibit_charge_minutes - get inhibit charge period from SMAPI
+ * @bat:     battery number (0 or 1)
+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
+ * Note that @minutes is the originally set value, it does not count down.
+ */
+static int get_inhibit_charge_minutes(int bat, int *minutes)
+{
+	return __get_inhibit_charge_minutes(bat, minutes, NULL);
+}
+
+/**
+ * set_inhibit_charge_minutes - write inhibit charge period to SMAPI
+ * @bat:     battery number (0 or 1)
+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
+ */
+static int set_inhibit_charge_minutes(int bat, int minutes)
+{
+	u32 ecx;
+	const char *msg;
+	int ret;
+
+	/* verify read before writing */
+	ret = __get_inhibit_charge_minutes(bat, NULL, &ecx);
+	if (ret)
+		return ret;
+
+	ecx = ((bat+1)<<8) | (ecx&0x00FE) | (minutes > 0 ? 0x0001 : 0x0000);
+	if (minutes > 0xFFFF)
+		minutes = 0xFFFF;
+	ret = smapi_write(SMAPI_SET_INHIBIT_CHARGE, ecx, 0, minutes, &msg);
+	if (ret)
+		TPRINTK(KERN_NOTICE,
+			"set to %d failed for bat=%d: %s", minutes, bat, msg);
+	else
+		TPRINTK(KERN_INFO, "set to %d for bat=%d\n", minutes, bat);
+	return ret;
+}
+
+
+/**
+ * get_force_discharge - get status of forced discharging from SMAPI
+ * @bat:     battery number (0 or 1)
+ * @enabled: 1 if forced discharged is enabled, 0 if not
+ */
+static int get_force_discharge(int bat, int *enabled)
+{
+	u32 ecx = (bat+1)<<8;
+	const char *msg;
+	int ret = smapi_request(SMAPI_GET_FORCE_DISCHARGE, ecx, 0, 0,
+				NULL, &ecx, NULL, NULL, NULL, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "failed for bat=%d: %s", bat, msg);
+		return ret;
+	}
+	*enabled = (!(ecx&0x00000100) && (ecx&0x00000001))?1:0;
+	return 0;
+}
+
+/**
+ * set_force_discharge - write status of forced discharging to SMAPI
+ * @bat:     battery number (0 or 1)
+ * @enabled: 1 if forced discharged is enabled, 0 if not
+ */
+static int set_force_discharge(int bat, int enabled)
+{
+	u32 ecx = (bat+1)<<8;
+	const char *msg;
+	int ret = smapi_request(SMAPI_GET_FORCE_DISCHARGE, ecx, 0, 0,
+				NULL, &ecx, NULL, NULL, NULL, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "get failed for bat=%d: %s", bat, msg);
+		return ret;
+	}
+	if (ecx&0x00000100) {
+		TPRINTK(KERN_NOTICE, "cannot force discharge bat=%d", bat);
+		return -EIO;
+	}
+
+	ecx = ((bat+1)<<8) | (ecx&0x000000FA) | (enabled?0x00000001:0);
+	ret = smapi_write(SMAPI_SET_FORCE_DISCHARGE, ecx, 0, 0, &msg);
+	if (ret)
+		TPRINTK(KERN_NOTICE, "set to %d failed for bat=%d: %s",
+			enabled, bat, msg);
+	else
+		TPRINTK(KERN_INFO, "set to %d for bat=%d", enabled, bat);
+	return ret;
+}
+
+
+/*********************************************************************
+ * Wrappers to threshold-related SMAPI functions, which handle default
+ * thresholds and related quirks.
+ */
+
+/* Minimum, default and minimum difference for battery charging thresholds: */
+#define MIN_THRESH_DELTA      4  /* Min delta between start and stop thresh */
+#define MIN_THRESH_START      2
+#define MAX_THRESH_START      (100-MIN_THRESH_DELTA)
+#define MIN_THRESH_STOP       (MIN_THRESH_START + MIN_THRESH_DELTA)
+#define MAX_THRESH_STOP       100
+#define DEFAULT_THRESH_START  MAX_THRESH_START
+#define DEFAULT_THRESH_STOP   MAX_THRESH_STOP
+
+/* The GUI of IBM's Battery Maximizer seems to show a start threshold that
+ * is 1 more than the value we set/get via SMAPI. Since the threshold is
+ * maintained across reboot, this can be confusing. So we kludge our
+ * interface for interoperability: */
+#define BATMAX_FIX   1
+
+/* Get charge start/stop threshold (1..100),
+ * substituting default values if needed and applying BATMAT_FIX. */
+static int get_thresh(int bat, enum thresh_type which, int *thresh)
+{
+	int ret = get_real_thresh(bat, which, thresh);
+	if (ret)
+		return ret;
+	if (*thresh == 0)
+		*thresh = (which == THRESH_START) ? DEFAULT_THRESH_START
+						  : DEFAULT_THRESH_STOP;
+	else if (which == THRESH_START)
+		*thresh += BATMAX_FIX;
+	return 0;
+}
+
+
+/* Set charge start/stop threshold (1..100),
+ * substituting default values if needed and applying BATMAT_FIX. */
+static int set_thresh(int bat, enum thresh_type which, int thresh)
+{
+	if (which == THRESH_STOP && thresh == DEFAULT_THRESH_STOP)
+		thresh = 0; /* 100 is out of range, but default means 100 */
+	if (which == THRESH_START)
+		thresh -= BATMAX_FIX;
+	return set_real_thresh(bat, which, thresh);
+}
+
+/*********************************************************************
+ * ThinkPad embedded controller readout and basic functions
+ */
+
+/**
+ * read_tp_ec_row - read data row from the ThinkPad embedded controller
+ * @arg0: EC command code
+ * @bat: battery number, 0 or 1
+ * @j: the byte value to be used for "junk" (unused) input/outputs
+ * @dataval: result vector
+ */
+static int read_tp_ec_row(u8 arg0, int bat, u8 j, u8 *dataval)
+{
+	int ret;
+	const struct thinkpad_ec_row args = { .mask = 0xFFFF,
+		.val = {arg0, j,j,j,j,j,j,j,j,j,j,j,j,j,j, (u8)bat} };
+	struct thinkpad_ec_row data = { .mask = 0xFFFF };
+
+	ret = thinkpad_ec_lock();
+	if (ret)
+		return ret;
+	ret = thinkpad_ec_read_row(&args, &data);
+	thinkpad_ec_unlock();
+	memcpy(dataval, &data.val, TP_CONTROLLER_ROW_LEN);
+	return ret;
+}
+
+/**
+ * power_device_present - check for presence of battery or AC power
+ * @bat: 0 for battery 0, 1 for battery 1, otherwise AC power
+ * Returns 1 if present, 0 if not present, negative if error.
+ */
+static int power_device_present(int bat)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	u8 test;
+	int ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	switch (bat) {
+	case 0:  test = 0x40; break; /* battery 0 */
+	case 1:  test = 0x20; break; /* battery 1 */
+	default: test = 0x80;        /* AC power */
+	}
+	return (row[0] & test) ? 1 : 0;
+}
+
+/**
+ * bat_has_status - check if battery can report detailed status
+ * @bat: 0 for battery 0, 1 for battery 1
+ * Returns 1 if yes, 0 if no, negative if error.
+ */
+static int bat_has_status(int bat)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	if ((row[0] & (bat?0x20:0x40)) == 0) /* no battery */
+		return 0;
+	if ((row[1] & (0x60)) == 0) /* no status */
+		return 0;
+	return 1;
+}
+
+/**
+ * get_tp_ec_bat_16 - read a 16-bit value from EC battery status data
+ * @arg0: first argument to EC
+ * @off: offset in row returned from EC
+ * @bat: battery (0 or 1)
+ * @val: the 16-bit value obtained
+ * Returns nonzero on error.
+ */
+static int get_tp_ec_bat_16(u8 arg0, int offset, int bat, u16 *val)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int ret;
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(arg0, bat, 0, row);
+	if (ret)
+		return ret;
+	*val = *(u16 *)(row+offset);
+	return 0;
+}
+
+/*********************************************************************
+ * sysfs attributes for batteries -
+ * definitions and helper functions
+ */
+
+/* A custom device attribute struct which holds a battery number */
+struct bat_device_attribute {
+	struct device_attribute dev_attr;
+	int bat;
+};
+
+/**
+ * attr_get_bat - get the battery to which the attribute belongs
+ */
+static int attr_get_bat(struct device_attribute *attr)
+{
+	return container_of(attr, struct bat_device_attribute, dev_attr)->bat;
+}
+
+/**
+ * show_tp_ec_bat_u16 - show an unsigned 16-bit battery attribute
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @mul: correction factor to multiply by
+ * @na_msg: string to output is value not available (0xFFFFFFFF)
+ * @attr: battery attribute
+ * @buf: output buffer
+ * The 16-bit value is read from the EC, treated as unsigned,
+ * transformed as x->mul*x, and printed to the buffer.
+ * If the value is 0xFFFFFFFF and na_msg!=%NULL, na_msg is printed instead.
+ */
+static ssize_t show_tp_ec_bat_u16(u8 arg0, int offset, int mul,
+			      const char *na_msg,
+			      struct device_attribute *attr, char *buf)
+{
+	u16 val;
+	int ret = get_tp_ec_bat_16(arg0, offset, attr_get_bat(attr), &val);
+	if (ret)
+		return ret;
+	if (na_msg && val == 0xFFFF)
+		return sprintf(buf, "%s\n", na_msg);
+	else
+		return sprintf(buf, "%u\n", mul*(unsigned int)val);
+}
+
+/**
+ * show_tp_ec_bat_s16 - show an signed 16-bit battery attribute
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @mul: correction factor to multiply by
+ * @add: correction term to add after multiplication
+ * @attr: battery attribute
+ * @buf: output buffer
+ * The 16-bit value is read from the EC, treated as signed,
+ * transformed as x->mul*x+add, and printed to the buffer.
+ */
+static ssize_t show_tp_ec_bat_s16(u8 arg0, int offset, int mul, int add,
+			      struct device_attribute *attr, char *buf)
+{
+	u16 val;
+	int ret = get_tp_ec_bat_16(arg0, offset, attr_get_bat(attr), &val);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", mul*(s16)val+add);
+}
+
+/**
+ * show_tp_ec_bat_str - show a string from EC battery status data
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @maxlen: maximum string length
+ * @attr: battery attribute
+ * @buf: output buffer
+ */
+static ssize_t show_tp_ec_bat_str(u8 arg0, int offset, int maxlen,
+			      struct device_attribute *attr, char *buf)
+{
+	int bat = attr_get_bat(attr);
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int ret;
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(arg0, bat, 0, row);
+	if (ret)
+		return ret;
+	strncpy(buf, (char *)row+offset, maxlen);
+	buf[maxlen] = 0;
+	strcat(buf, "\n");
+	return strlen(buf);
+}
+
+/**
+ * show_tp_ec_bat_power - show a power readout from EC battery status data
+ * @arg0: specified 1st argument of EC raw to read
+ * @offV: byte offset of voltage in EC raw data
+ * @offI: byte offset of current in EC raw data
+ * @attr: battery attribute
+ * @buf: output buffer
+ * Computes the power as current*voltage from the two given readout offsets.
+ */
+static ssize_t show_tp_ec_bat_power(u8 arg0, int offV, int offI,
+				struct device_attribute *attr, char *buf)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int milliamp, millivolt, ret;
+	int bat = attr_get_bat(attr);
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	millivolt = *(u16 *)(row+offV);
+	milliamp = *(s16 *)(row+offI);
+	return sprintf(buf, "%d\n", milliamp*millivolt/1000); /* units: mW */
+}
+
+/**
+ * show_tp_ec_bat_date - decode and show a date from EC battery status data
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @attr: battery attribute
+ * @buf: output buffer
+ */
+static ssize_t show_tp_ec_bat_date(u8 arg0, int offset,
+			       struct device_attribute *attr, char *buf)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	u16 v;
+	int ret;
+	int day, month, year;
+	int bat = attr_get_bat(attr);
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(arg0, bat, 0, row);
+	if (ret)
+		return ret;
+
+	/* Decode bit-packed: v = day | (month<<5) | ((year-1980)<<9) */
+	v = *(u16 *)(row+offset);
+	day = v & 0x1F;
+	month = (v >> 5) & 0xF;
+	year = (v >> 9) + 1980;
+
+	return sprintf(buf, "%04d-%02d-%02d\n", year, month, day);
+}
+
+
+/*********************************************************************
+ * sysfs attribute I/O for batteries -
+ * the actual attribute show/store functions
+ */
+
+static ssize_t show_battery_start_charge_thresh(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int thresh;
+	int bat = attr_get_bat(attr);
+	int ret = get_thresh(bat, THRESH_START, &thresh);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", thresh);  /* units: percent */
+}
+
+static ssize_t show_battery_stop_charge_thresh(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int thresh;
+	int bat = attr_get_bat(attr);
+	int ret = get_thresh(bat, THRESH_STOP, &thresh);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", thresh);  /* units: percent */
+}
+
+/**
+ * store_battery_start_charge_thresh - store battery_start_charge_thresh attr
+ * Since this is a kernel<->user interface, we ensure a valid state for
+ * the hardware. We do this by clamping the requested threshold to the
+ * valid range and, if necessary, moving the other threshold so that
+ * it's MIN_THRESH_DELTA away from this one.
+ */
+static ssize_t store_battery_start_charge_thresh(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	int thresh, other_thresh, ret;
+	int bat = attr_get_bat(attr);
+
+	if (sscanf(buf, "%d", &thresh) != 1 || thresh < 1 || thresh > 100)
+		return -EINVAL;
+
+	if (thresh < MIN_THRESH_START) /* clamp up to MIN_THRESH_START */
+		thresh = MIN_THRESH_START;
+	if (thresh > MAX_THRESH_START) /* clamp down to MAX_THRESH_START */
+		thresh = MAX_THRESH_START;
+
+	down(&smapi_mutex);
+	ret = get_thresh(bat, THRESH_STOP, &other_thresh);
+	if (ret != -EOPNOTSUPP && ret != -ENXIO) {
+		if (ret) /* other threshold is set? */
+			goto out;
+		ret = get_real_thresh(bat, THRESH_START, NULL);
+		if (ret) /* this threshold is set? */
+			goto out;
+		if (other_thresh < thresh+MIN_THRESH_DELTA) {
+			/* move other thresh to keep it above this one */
+			ret = set_thresh(bat, THRESH_STOP,
+					 thresh+MIN_THRESH_DELTA);
+			if (ret)
+				goto out;
+		}
+	}
+	ret = set_thresh(bat, THRESH_START, thresh);
+out:
+	up(&smapi_mutex);
+	return count;
+
+}
+
+/**
+ * store_battery_stop_charge_thresh - store battery_stop_charge_thresh attr
+ * Since this is a kernel<->user interface, we ensure a valid state for
+ * the hardware. We do this by clamping the requested threshold to the
+ * valid range and, if necessary, moving the other threshold so that
+ * it's MIN_THRESH_DELTA away from this one.
+ */
+static ssize_t store_battery_stop_charge_thresh(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	int thresh, other_thresh, ret;
+	int bat = attr_get_bat(attr);
+
+	if (sscanf(buf, "%d", &thresh) != 1 || thresh < 1 || thresh > 100)
+		return -EINVAL;
+
+	if (thresh < MIN_THRESH_STOP) /* clamp up to MIN_THRESH_STOP */
+		thresh = MIN_THRESH_STOP;
+
+	down(&smapi_mutex);
+	ret = get_thresh(bat, THRESH_START, &other_thresh);
+	if (ret != -EOPNOTSUPP && ret != -ENXIO) { /* other threshold exists? */
+		if (ret)
+			goto out;
+		/* this threshold exists? */
+		ret = get_real_thresh(bat, THRESH_STOP, NULL);
+		if (ret)
+			goto out;
+		if (other_thresh >= thresh-MIN_THRESH_DELTA) {
+			 /* move other thresh to be below this one */
+			ret = set_thresh(bat, THRESH_START,
+					 thresh-MIN_THRESH_DELTA);
+			if (ret)
+				goto out;
+		}
+	}
+	ret = set_thresh(bat, THRESH_STOP, thresh);
+out:
+	up(&smapi_mutex);
+	return count;
+}
+
+static ssize_t show_battery_inhibit_charge_minutes(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int minutes;
+	int bat = attr_get_bat(attr);
+	int ret = get_inhibit_charge_minutes(bat, &minutes);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", minutes);  /* units: minutes */
+}
+
+static ssize_t store_battery_inhibit_charge_minutes(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	int ret;
+	int minutes;
+	int bat = attr_get_bat(attr);
+	if (sscanf(buf, "%d", &minutes) != 1 || minutes < 0) {
+		TPRINTK(KERN_ERR, "inhibit_charge_minutes: "
+			      "must be a non-negative integer");
+		return -EINVAL;
+	}
+	ret = set_inhibit_charge_minutes(bat, minutes);
+	if (ret)
+		return ret;
+	return count;
+}
+
+static ssize_t show_battery_force_discharge(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int enabled;
+	int bat = attr_get_bat(attr);
+	int ret = get_force_discharge(bat, &enabled);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", enabled);  /* type: boolean */
+}
+
+static ssize_t store_battery_force_discharge(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	int enabled;
+	int bat = attr_get_bat(attr);
+	if (sscanf(buf, "%d", &enabled) != 1 || enabled < 0 || enabled > 1)
+		return -EINVAL;
+	ret = set_force_discharge(bat, enabled);
+	if (ret)
+		return ret;
+	return count;
+}
+
+static ssize_t show_battery_installed(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int bat = attr_get_bat(attr);
+	int ret = power_device_present(bat);
+	if (ret < 0)
+		return ret;
+	return sprintf(buf, "%d\n", ret); /* type: boolean */
+}
+
+static ssize_t show_battery_state(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	const char *txt;
+	int ret;
+	int bat = attr_get_bat(attr);
+	if (bat_has_status(bat) != 1)
+		return sprintf(buf, "none\n");
+	ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	switch (row[1] & 0xf0) {
+	case 0xc0: txt = "idle"; break;
+	case 0xd0: txt = "discharging"; break;
+	case 0xe0: txt = "charging"; break;
+	default:   return sprintf(buf, "unknown (0x%x)\n", row[1]);
+	}
+	return sprintf(buf, "%s\n", txt);  /* type: string from fixed set */
+}
+
+static ssize_t show_battery_manufacturer(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string. SBS spec v1.1 p34: ManufacturerName() */
+	return show_tp_ec_bat_str(4, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
+}
+
+static ssize_t show_battery_model(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string. SBS spec v1.1 p34: DeviceName() */
+	return show_tp_ec_bat_str(5, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
+}
+
+static ssize_t show_battery_barcoding(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string */
+	return show_tp_ec_bat_str(7, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
+}
+
+static ssize_t show_battery_chemistry(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string. SBS spec v1.1 p34-35: DeviceChemistry() */
+	return show_tp_ec_bat_str(6, 2, 5, attr, buf);
+}
+
+static ssize_t show_battery_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV. SBS spec v1.1 p24: Voltage() */
+	return show_tp_ec_bat_u16(1, 6, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_design_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV. SBS spec v1.1 p32: DesignVoltage() */
+	return show_tp_ec_bat_u16(3, 4, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_charging_max_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV. SBS spec v1.1 p37,39: ChargingVoltage() */
+	return show_tp_ec_bat_u16(9, 8, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group0_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 12, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group1_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 10, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group2_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 8, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group3_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 6, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_current_now(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mA. SBS spec v1.1 p24: Current() */
+	return show_tp_ec_bat_s16(1, 8, 1, 0, attr, buf);
+}
+
+static ssize_t show_battery_current_avg(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mA. SBS spec v1.1 p24: AverageCurrent() */
+	return show_tp_ec_bat_s16(1, 10, 1, 0, attr, buf);
+}
+
+static ssize_t show_battery_charging_max_current(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mA. SBS spec v1.1 p36,38: ChargingCurrent() */
+	return show_tp_ec_bat_s16(9, 6, 1, 0, attr, buf);
+}
+
+static ssize_t show_battery_power_now(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mW. SBS spec v1.1: Voltage()*Current() */
+	return show_tp_ec_bat_power(1, 6, 8, attr, buf);
+}
+
+static ssize_t show_battery_power_avg(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mW. SBS spec v1.1: Voltage()*AverageCurrent() */
+	return show_tp_ec_bat_power(1, 6, 10, attr, buf);
+}
+
+static ssize_t show_battery_remaining_percent(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: percent. SBS spec v1.1 p25: RelativeStateOfCharge() */
+	return show_tp_ec_bat_u16(1, 12, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_remaining_percent_error(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: percent. SBS spec v1.1 p25: MaxError() */
+	return show_tp_ec_bat_u16(9, 4, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_remaining_charging_time(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: minutes. SBS spec v1.1 p27: AverageTimeToFull() */
+	return show_tp_ec_bat_u16(2, 8, 1, "not_charging", attr, buf);
+}
+
+static ssize_t show_battery_remaining_running_time(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: minutes. SBS spec v1.1 p27: RunTimeToEmpty() */
+	return show_tp_ec_bat_u16(2, 6, 1, "not_discharging", attr, buf);
+}
+
+static ssize_t show_battery_remaining_running_time_now(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: minutes. SBS spec v1.1 p27: RunTimeToEmpty() */
+	return show_tp_ec_bat_u16(2, 4, 1, "not_discharging", attr, buf);
+}
+
+static ssize_t show_battery_remaining_capacity(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mWh. SBS spec v1.1 p26. */
+	return show_tp_ec_bat_u16(1, 14, 10, "", attr, buf);
+}
+
+static ssize_t show_battery_last_full_capacity(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mWh. SBS spec v1.1 p26: FullChargeCapacity() */
+	return show_tp_ec_bat_u16(2, 2, 10, "", attr, buf);
+}
+
+static ssize_t show_battery_design_capacity(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mWh. SBS spec v1.1 p32: DesignCapacity() */
+	return show_tp_ec_bat_u16(3, 2, 10, "", attr, buf);
+}
+
+static ssize_t show_battery_cycle_count(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: ordinal. SBS spec v1.1 p32: CycleCount() */
+	return show_tp_ec_bat_u16(2, 12, 1, "", attr, buf);
+}
+
+static ssize_t show_battery_temperature(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: millicelsius. SBS spec v1.1: Temperature()*10 */
+	return show_tp_ec_bat_s16(1, 4, 100, -273100, attr, buf);
+}
+
+static ssize_t show_battery_serial(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: int. SBS spec v1.1 p34: SerialNumber() */
+	return show_tp_ec_bat_u16(3, 10, 1, "", attr, buf);
+}
+
+static ssize_t show_battery_manufacture_date(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: YYYY-MM-DD. SBS spec v1.1 p34: ManufactureDate() */
+	return show_tp_ec_bat_date(3, 8, attr, buf);
+}
+
+static ssize_t show_battery_first_use_date(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: YYYY-MM-DD */
+	return show_tp_ec_bat_date(8, 2, attr, buf);
+}
+
+/**
+ * show_battery_dump - show the battery's dump attribute
+ * The dump attribute gives a hex dump of all EC readouts related to a
+ * battery. Some of the enumerated values don't really exist (i.e., the
+ * EC function just leaves them untouched); we use a kludge to detect and
+ * denote these.
+ */
+#define MIN_DUMP_ARG0 0x00
+#define MAX_DUMP_ARG0 0x0a /* 0x0b is useful too but hangs old EC firmware */
+static ssize_t show_battery_dump(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int i;
+	char *p = buf;
+	int bat = attr_get_bat(attr);
+	u8 arg0; /* first argument to EC */
+	u8 rowa[TP_CONTROLLER_ROW_LEN],
+	   rowb[TP_CONTROLLER_ROW_LEN];
+	const u8 junka = 0xAA,
+		 junkb = 0x55; /* junk values for testing changes */
+	int ret;
+
+	for (arg0 = MIN_DUMP_ARG0; arg0 <= MAX_DUMP_ARG0; ++arg0) {
+		if ((p-buf) > PAGE_SIZE-TP_CONTROLLER_ROW_LEN*5)
+			return -ENOMEM; /* don't overflow sysfs buf */
+		/* Read raw twice with different junk values,
+		 * to detect unused output bytes which are left unchaged: */
+		ret = read_tp_ec_row(arg0, bat, junka, rowa);
+		if (ret)
+			return ret;
+		ret = read_tp_ec_row(arg0, bat, junkb, rowb);
+		if (ret)
+			return ret;
+		for (i = 0; i < TP_CONTROLLER_ROW_LEN; i++) {
+			if (rowa[i] == junka && rowb[i] == junkb)
+				p += sprintf(p, "-- "); /* unused by EC */
+			else
+				p += sprintf(p, "%02x ", rowa[i]);
+		}
+		p += sprintf(p, "\n");
+	}
+	return p-buf;
+}
+
+
+/*********************************************************************
+ * sysfs attribute I/O, other than batteries
+ */
+
+static ssize_t show_ac_connected(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int ret = power_device_present(0xFF);
+	if (ret < 0)
+		return ret;
+	return sprintf(buf, "%d\n", ret);  /* type: boolean */
+}
+
+/*********************************************************************
+ * The the "smapi_request" sysfs attribute executes a raw SMAPI call.
+ * You write to make a request and read to get the result. The state
+ * is saved globally rather than per fd (sysfs limitation), so
+ * simultaenous requests may get each other's results! So this is for
+ * development and debugging only.
+ */
+#define MAX_SMAPI_ATTR_ANSWER_LEN   128
+static char smapi_attr_answer[MAX_SMAPI_ATTR_ANSWER_LEN] = "";
+
+static ssize_t show_smapi_request(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	int ret = snprintf(buf, PAGE_SIZE, "%s", smapi_attr_answer);
+	smapi_attr_answer[0] = '\0';
+	return ret;
+}
+
+static ssize_t store_smapi_request(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	unsigned int inEBX, inECX, inEDI, inESI;
+	u32 outEBX, outECX, outEDX, outEDI, outESI;
+	const char *msg;
+	int ret;
+	if (sscanf(buf, "%x %x %x %x", &inEBX, &inECX, &inEDI, &inESI) != 4) {
+		smapi_attr_answer[0] = '\0';
+		return -EINVAL;
+	}
+	ret = smapi_request(
+		   inEBX, inECX, inEDI, inESI,
+		   &outEBX, &outECX, &outEDX, &outEDI, &outESI, &msg);
+	snprintf(smapi_attr_answer, MAX_SMAPI_ATTR_ANSWER_LEN,
+		 "%x %x %x %x %x %d '%s'\n",
+		 (unsigned int)outEBX, (unsigned int)outECX,
+		 (unsigned int)outEDX, (unsigned int)outEDI,
+		 (unsigned int)outESI, ret, msg);
+	if (ret)
+		return ret;
+	else
+		return count;
+}
+
+/*********************************************************************
+ * Power management: the embedded controller forgets the battery
+ * thresholds when the system is suspended to disk and unplugged from
+ * AC and battery, so we restore it upon resume.
+ */
+
+static int saved_threshs[4] = {-1, -1, -1, -1};  /* -1 = don't know */
+
+static int tp_suspend(struct platform_device *dev, pm_message_t state)
+{
+	int restore = (state.event == PM_EVENT_HIBERNATE ||
+	               state.event == PM_EVENT_FREEZE);
+	if (!restore || get_real_thresh(0, THRESH_STOP , &saved_threshs[0]))
+		saved_threshs[0] = -1;
+	if (!restore || get_real_thresh(0, THRESH_START, &saved_threshs[1]))
+		saved_threshs[1] = -1;
+	if (!restore || get_real_thresh(1, THRESH_STOP , &saved_threshs[2]))
+		saved_threshs[2] = -1;
+	if (!restore || get_real_thresh(1, THRESH_START, &saved_threshs[3]))
+		saved_threshs[3] = -1;
+	DPRINTK("suspend saved: %d %d %d %d", saved_threshs[0],
+		saved_threshs[1], saved_threshs[2], saved_threshs[3]);
+	return 0;
+}
+
+static int tp_resume(struct platform_device *dev)
+{
+	DPRINTK("resume restoring: %d %d %d %d", saved_threshs[0],
+		saved_threshs[1], saved_threshs[2], saved_threshs[3]);
+	if (saved_threshs[0] >= 0)
+		set_real_thresh(0, THRESH_STOP , saved_threshs[0]);
+	if (saved_threshs[1] >= 0)
+		set_real_thresh(0, THRESH_START, saved_threshs[1]);
+	if (saved_threshs[2] >= 0)
+		set_real_thresh(1, THRESH_STOP , saved_threshs[2]);
+	if (saved_threshs[3] >= 0)
+		set_real_thresh(1, THRESH_START, saved_threshs[3]);
+	return 0;
+}
+
+
+/*********************************************************************
+ * Driver model
+ */
+
+static struct platform_driver tp_driver = {
+	.suspend = tp_suspend,
+	.resume = tp_resume,
+	.driver = {
+		.name = "smapi",
+		.owner = THIS_MODULE
+	},
+};
+
+
+/*********************************************************************
+ * Sysfs device model
+ */
+
+/* Attributes in /sys/devices/platform/smapi/ */
+
+static DEVICE_ATTR(ac_connected, 0444, show_ac_connected, NULL);
+static DEVICE_ATTR(smapi_request, 0600, show_smapi_request,
+					store_smapi_request);
+
+static struct attribute *tp_root_attributes[] = {
+	&dev_attr_ac_connected.attr,
+	&dev_attr_smapi_request.attr,
+	NULL
+};
+static struct attribute_group tp_root_attribute_group = {
+	.attrs = tp_root_attributes
+};
+
+/* Attributes under /sys/devices/platform/smapi/BAT{0,1}/ :
+ * Every attribute needs to be defined (i.e., statically allocated) for
+ * each battery, and then referenced in the attribute list of each battery.
+ * We use preprocessor voodoo to avoid duplicating the list of attributes 4
+ * times. The preprocessor output is just normal sysfs attributes code.
+ */
+
+/**
+ * FOREACH_BAT_ATTR - invoke the given macros on all our battery attributes
+ * @_BAT:     battery number (0 or 1)
+ * @_ATTR_RW: macro to invoke for each read/write attribute
+ * @_ATTR_R:  macro to invoke for each read-only  attribute
+ */
+#define FOREACH_BAT_ATTR(_BAT, _ATTR_RW, _ATTR_R) \
+	_ATTR_RW(_BAT, start_charge_thresh) \
+	_ATTR_RW(_BAT, stop_charge_thresh) \
+	_ATTR_RW(_BAT, inhibit_charge_minutes) \
+	_ATTR_RW(_BAT, force_discharge) \
+	_ATTR_R(_BAT, installed) \
+	_ATTR_R(_BAT, state) \
+	_ATTR_R(_BAT, manufacturer) \
+	_ATTR_R(_BAT, model) \
+	_ATTR_R(_BAT, barcoding) \
+	_ATTR_R(_BAT, chemistry) \
+	_ATTR_R(_BAT, voltage) \
+	_ATTR_R(_BAT, group0_voltage) \
+	_ATTR_R(_BAT, group1_voltage) \
+	_ATTR_R(_BAT, group2_voltage) \
+	_ATTR_R(_BAT, group3_voltage) \
+	_ATTR_R(_BAT, current_now) \
+	_ATTR_R(_BAT, current_avg) \
+	_ATTR_R(_BAT, charging_max_current) \
+	_ATTR_R(_BAT, power_now) \
+	_ATTR_R(_BAT, power_avg) \
+	_ATTR_R(_BAT, remaining_percent) \
+	_ATTR_R(_BAT, remaining_percent_error) \
+	_ATTR_R(_BAT, remaining_charging_time) \
+	_ATTR_R(_BAT, remaining_running_time) \
+	_ATTR_R(_BAT, remaining_running_time_now) \
+	_ATTR_R(_BAT, remaining_capacity) \
+	_ATTR_R(_BAT, last_full_capacity) \
+	_ATTR_R(_BAT, design_voltage) \
+	_ATTR_R(_BAT, charging_max_voltage) \
+	_ATTR_R(_BAT, design_capacity) \
+	_ATTR_R(_BAT, cycle_count) \
+	_ATTR_R(_BAT, temperature) \
+	_ATTR_R(_BAT, serial) \
+	_ATTR_R(_BAT, manufacture_date) \
+	_ATTR_R(_BAT, first_use_date) \
+	_ATTR_R(_BAT, dump)
+
+/* Define several macros we will feed into FOREACH_BAT_ATTR: */
+
+#define DEFINE_BAT_ATTR_RW(_BAT,_NAME) \
+	static struct bat_device_attribute dev_attr_##_NAME##_##_BAT = {  \
+		.dev_attr = __ATTR(_NAME, 0644, show_battery_##_NAME,   \
+						store_battery_##_NAME), \
+		.bat = _BAT \
+	};
+
+#define DEFINE_BAT_ATTR_R(_BAT,_NAME) \
+	static struct bat_device_attribute dev_attr_##_NAME##_##_BAT = {    \
+		.dev_attr = __ATTR(_NAME, 0644, show_battery_##_NAME, 0), \
+		.bat = _BAT \
+	};
+
+#define REF_BAT_ATTR(_BAT,_NAME) \
+	&dev_attr_##_NAME##_##_BAT.dev_attr.attr,
+
+/* This provide all attributes for one battery: */
+
+#define PROVIDE_BAT_ATTRS(_BAT) \
+	FOREACH_BAT_ATTR(_BAT, DEFINE_BAT_ATTR_RW, DEFINE_BAT_ATTR_R) \
+	static struct attribute *tp_bat##_BAT##_attributes[] = { \
+		FOREACH_BAT_ATTR(_BAT, REF_BAT_ATTR, REF_BAT_ATTR) \
+		NULL \
+	}; \
+	static struct attribute_group tp_bat##_BAT##_attribute_group = { \
+		.name  = "BAT" #_BAT, \
+		.attrs = tp_bat##_BAT##_attributes \
+	};
+
+/* Finally genereate the attributes: */
+
+PROVIDE_BAT_ATTRS(0)
+PROVIDE_BAT_ATTRS(1)
+
+/* List of attribute groups */
+
+static struct attribute_group *attr_groups[] = {
+	&tp_root_attribute_group,
+	&tp_bat0_attribute_group,
+	&tp_bat1_attribute_group,
+	NULL
+};
+
+
+/*********************************************************************
+ * Init and cleanup
+ */
+
+static struct attribute_group **next_attr_group; /* next to register */
+
+static int __init tp_init(void)
+{
+	int ret;
+	printk(KERN_INFO "tp_smapi " TP_VERSION " loading...\n");
+
+	ret = find_smapi_port();
+	if (ret < 0)
+		goto err;
+	else
+		smapi_port = ret;
+
+	if (!request_region(smapi_port, 1, "smapi")) {
+		printk(KERN_ERR "tp_smapi cannot claim port 0x%x\n",
+		       smapi_port);
+		ret = -ENXIO;
+		goto err;
+	}
+
+	if (!request_region(SMAPI_PORT2, 1, "smapi")) {
+		printk(KERN_ERR "tp_smapi cannot claim port 0x%x\n",
+		       SMAPI_PORT2);
+		ret = -ENXIO;
+		goto err_port1;
+	}
+
+	ret = platform_driver_register(&tp_driver);
+	if (ret)
+		goto err_port2;
+
+	pdev = platform_device_alloc("smapi", -1);
+	if (!pdev) {
+		ret = -ENOMEM;
+		goto err_driver;
+	}
+
+	ret = platform_device_add(pdev);
+	if (ret)
+		goto err_device_free;
+
+	for (next_attr_group = attr_groups; *next_attr_group;
+	     ++next_attr_group) {
+		ret = sysfs_create_group(&pdev->dev.kobj, *next_attr_group);
+		if (ret)
+			goto err_attr;
+	}
+
+	printk(KERN_INFO "tp_smapi successfully loaded (smapi_port=0x%x).\n",
+	       smapi_port);
+	return 0;
+
+err_attr:
+	while (--next_attr_group >= attr_groups)
+		sysfs_remove_group(&pdev->dev.kobj, *next_attr_group);
+	platform_device_unregister(pdev);
+err_device_free:
+	platform_device_put(pdev);
+err_driver:
+	platform_driver_unregister(&tp_driver);
+err_port2:
+	release_region(SMAPI_PORT2, 1);
+err_port1:
+	release_region(smapi_port, 1);
+err:
+	printk(KERN_ERR "tp_smapi init failed (ret=%d)!\n", ret);
+	return ret;
+}
+
+static void __exit tp_exit(void)
+{
+	while (next_attr_group && --next_attr_group >= attr_groups)
+		sysfs_remove_group(&pdev->dev.kobj, *next_attr_group);
+	platform_device_unregister(pdev);
+	platform_driver_unregister(&tp_driver);
+	release_region(SMAPI_PORT2, 1);
+	if (smapi_port)
+		release_region(smapi_port, 1);
+
+	printk(KERN_INFO "tp_smapi unloaded.\n");
+}
+
+module_init(tp_init);
+module_exit(tp_exit);
diff -Nur linux-4.2/drivers/staging/Kconfig linux-4.2-pck/drivers/staging/Kconfig
--- linux-4.2/drivers/staging/Kconfig	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/staging/Kconfig	2015-09-08 00:48:22.225030705 -0300
@@ -112,4 +112,6 @@
 
 source "drivers/staging/wilc1000/Kconfig"
 
+source "drivers/staging/vhba/Kconfig"
+
 endif # STAGING
diff -Nur linux-4.2/drivers/staging/Makefile linux-4.2-pck/drivers/staging/Makefile
--- linux-4.2/drivers/staging/Makefile	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/staging/Makefile	2015-09-08 00:48:22.225030705 -0300
@@ -25,6 +25,7 @@
 obj-$(CONFIG_FB_SM7XX)		+= sm7xxfb/
 obj-$(CONFIG_FB_SM750)		+= sm750fb/
 obj-$(CONFIG_FB_XGI)		+= xgifb/
+obj-$(CONFIG_VHBA)		+= vhba/
 obj-$(CONFIG_USB_EMXX)		+= emxx_udc/
 obj-$(CONFIG_FT1000)		+= ft1000/
 obj-$(CONFIG_SPEAKUP)		+= speakup/
diff -Nur linux-4.2/drivers/staging/android/ashmem.c linux-4.2-pck/drivers/staging/android/ashmem.c
--- linux-4.2/drivers/staging/android/ashmem.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/staging/android/ashmem.c	2015-09-08 11:20:32.140361257 -0300
@@ -387,7 +387,7 @@
 			name = asma->name;
 
 		/* ... and allocate the backing shmem file */
-		vmfile = shmem_file_setup(name, asma->size, vma->vm_flags);
+		vmfile = shmem_file_setup(name, asma->size, vma->vm_flags, 0);
 		if (unlikely(IS_ERR(vmfile))) {
 			ret = PTR_ERR(vmfile);
 			goto out;
diff -Nur linux-4.2/drivers/staging/vhba/Kconfig linux-4.2-pck/drivers/staging/vhba/Kconfig
--- linux-4.2/drivers/staging/vhba/Kconfig	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/drivers/staging/vhba/Kconfig	2015-09-08 00:48:22.225030705 -0300
@@ -0,0 +1,9 @@
+config VHBA
+	tristate "Virtual (SCSI) Host Bus Adapter"
+	depends on SCSI
+	---help---
+        This is the in-kernel part of CDEmu, a CD/DVD-ROM device
+        emulator.
+
+	This driver can also be built as a module. If so, the module
+	will be called vhba.
diff -Nur linux-4.2/drivers/staging/vhba/Makefile linux-4.2-pck/drivers/staging/vhba/Makefile
--- linux-4.2/drivers/staging/vhba/Makefile	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/drivers/staging/vhba/Makefile	2015-09-08 00:48:22.225030705 -0300
@@ -0,0 +1,4 @@
+VHBA_VERSION := 20140928
+
+obj-$(CONFIG_VHBA)		+= vhba.o
+ccflags-y := -DVHBA_VERSION=\"$(VHBA_VERSION)\" -Werror
diff -Nur linux-4.2/drivers/staging/vhba/vhba.c linux-4.2-pck/drivers/staging/vhba/vhba.c
--- linux-4.2/drivers/staging/vhba/vhba.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/drivers/staging/vhba/vhba.c	2015-09-08 00:48:22.228363880 -0300
@@ -0,0 +1,1071 @@
+/*
+ * vhba.c
+ *
+ * Copyright (C) 2007-2012 Chia-I Wu <b90201047 AT ntu DOT edu DOT tw>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <asm/uaccess.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+
+/* scatterlist.page_link and sg_page() were introduced in 2.6.24 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
+#define USE_SG_PAGE
+#include <linux/scatterlist.h>
+#endif
+
+MODULE_AUTHOR("Chia-I Wu");
+MODULE_VERSION(VHBA_VERSION);
+MODULE_DESCRIPTION("Virtual SCSI HBA");
+MODULE_LICENSE("GPL");
+
+#ifdef DEBUG
+#define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __FUNCTION__, ## args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+/* scmd_dbg was introduced in 3.15 */
+#ifndef scmd_dbg
+#define scmd_dbg(scmd, fmt, a...)       \
+    dev_dbg(&(scmd)->device->sdev_gendev, fmt, ##a)
+#endif
+
+#ifndef scmd_warn
+#define scmd_warn(scmd, fmt, a...)      \
+    dev_warn(&(scmd)->device->sdev_gendev, fmt, ##a)
+#endif
+
+#define VHBA_MAX_SECTORS_PER_IO 256
+#define VHBA_MAX_ID 32
+#define VHBA_CAN_QUEUE 32
+#define VHBA_INVALID_ID VHBA_MAX_ID
+
+#define DATA_TO_DEVICE(dir) ((dir) == DMA_TO_DEVICE || (dir) == DMA_BIDIRECTIONAL)
+#define DATA_FROM_DEVICE(dir) ((dir) == DMA_FROM_DEVICE || (dir) == DMA_BIDIRECTIONAL)
+
+
+/* SCSI macros were introduced in 2.6.23 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23)
+#define scsi_sg_count(cmd) ((cmd)->use_sg)
+#define scsi_sglist(cmd) ((cmd)->request_buffer)
+#define scsi_bufflen(cmd) ((cmd)->request_bufflen)
+#define scsi_set_resid(cmd, to_read) {(cmd)->resid = (to_read);}
+#endif
+
+/* 1-argument form of k[un]map_atomic was introduced in 2.6.37-rc1;
+   2-argument form was deprecated in 3.4-rc1 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
+#define vhba_kmap_atomic kmap_atomic
+#define vhba_kunmap_atomic kunmap_atomic
+#else
+#define vhba_kmap_atomic(page) kmap_atomic(page, KM_USER0)
+#define vhba_kunmap_atomic(page) kunmap_atomic(page, KM_USER0)
+#endif
+
+
+enum vhba_req_state {
+    VHBA_REQ_FREE,
+    VHBA_REQ_PENDING,
+    VHBA_REQ_READING,
+    VHBA_REQ_SENT,
+    VHBA_REQ_WRITING,
+};
+
+struct vhba_command {
+    struct scsi_cmnd *cmd;
+    int status;
+    struct list_head entry;
+};
+
+struct vhba_device {
+    uint id;
+    spinlock_t cmd_lock;
+    struct list_head cmd_list;
+    wait_queue_head_t cmd_wq;
+    atomic_t refcnt;
+};
+
+struct vhba_host {
+    struct Scsi_Host *shost;
+    spinlock_t cmd_lock;
+    int cmd_next;
+    struct vhba_command commands[VHBA_CAN_QUEUE];
+    spinlock_t dev_lock;
+    struct vhba_device *devices[VHBA_MAX_ID];
+    int num_devices;
+    DECLARE_BITMAP(chgmap, VHBA_MAX_ID);
+    int chgtype[VHBA_MAX_ID];
+    struct work_struct scan_devices;
+};
+
+#define MAX_COMMAND_SIZE 16
+
+struct vhba_request {
+    __u32 tag;
+    __u32 lun;
+    __u8 cdb[MAX_COMMAND_SIZE];
+    __u8 cdb_len;
+    __u32 data_len;
+};
+
+struct vhba_response {
+    __u32 tag;
+    __u32 status;
+    __u32 data_len;
+};
+
+static struct vhba_command *vhba_alloc_command (void);
+static void vhba_free_command (struct vhba_command *vcmd);
+
+static struct platform_device vhba_platform_device;
+
+static struct vhba_device *vhba_device_alloc (void)
+{
+    struct vhba_device *vdev;
+
+    vdev = kzalloc(sizeof(struct vhba_device), GFP_KERNEL);
+    if (!vdev) {
+        return NULL;
+    }
+
+    vdev->id = VHBA_INVALID_ID;
+    spin_lock_init(&vdev->cmd_lock);
+    INIT_LIST_HEAD(&vdev->cmd_list);
+    init_waitqueue_head(&vdev->cmd_wq);
+    atomic_set(&vdev->refcnt, 1);
+
+    return vdev;
+}
+
+static void vhba_device_put (struct vhba_device *vdev)
+{
+    if (atomic_dec_and_test(&vdev->refcnt)) {
+        kfree(vdev);
+    }
+}
+
+static struct vhba_device *vhba_device_get (struct vhba_device *vdev)
+{
+    atomic_inc(&vdev->refcnt);
+
+    return vdev;
+}
+
+static int vhba_device_queue (struct vhba_device *vdev, struct scsi_cmnd *cmd)
+{
+    struct vhba_command *vcmd;
+    unsigned long flags;
+
+    vcmd = vhba_alloc_command();
+    if (!vcmd) {
+        return SCSI_MLQUEUE_HOST_BUSY;
+    }
+
+    vcmd->cmd = cmd;
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    list_add_tail(&vcmd->entry, &vdev->cmd_list);
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    wake_up_interruptible(&vdev->cmd_wq);
+
+    return 0;
+}
+
+static int vhba_device_dequeue (struct vhba_device *vdev, struct scsi_cmnd *cmd)
+{
+    struct vhba_command *vcmd;
+    int retval;
+    unsigned long flags;
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        if (vcmd->cmd == cmd) {
+            list_del_init(&vcmd->entry);
+            break;
+        }
+    }
+
+    /* command not found */
+    if (&vcmd->entry == &vdev->cmd_list) {
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+        return SUCCESS;
+    }
+
+    while (vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING) {
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+        scmd_dbg(cmd, "wait for I/O before aborting\n");
+        schedule_timeout(1);
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+    }
+
+    retval = (vcmd->status == VHBA_REQ_SENT) ? FAILED : SUCCESS;
+
+    vhba_free_command(vcmd);
+
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return retval;
+}
+
+static inline void vhba_scan_devices_add (struct vhba_host *vhost, int id)
+{
+    struct scsi_device *sdev;
+
+    sdev = scsi_device_lookup(vhost->shost, 0, id, 0);
+    if (!sdev) {
+        scsi_add_device(vhost->shost, 0, id, 0);
+    } else {
+        dev_warn(&vhost->shost->shost_gendev, "tried to add an already-existing device 0:%d:0!\n", id);
+        scsi_device_put(sdev);
+    }
+}
+
+static inline void vhba_scan_devices_remove (struct vhba_host *vhost, int id)
+{
+    struct scsi_device *sdev;
+
+    sdev = scsi_device_lookup(vhost->shost, 0, id, 0);
+    if (sdev) {
+        scsi_remove_device(sdev);
+        scsi_device_put(sdev);
+    } else {
+        dev_warn(&vhost->shost->shost_gendev, "tried to remove non-existing device 0:%d:0!\n", id);
+    }
+}
+
+static void vhba_scan_devices (struct work_struct *work)
+{
+    struct vhba_host *vhost = container_of(work, struct vhba_host, scan_devices);
+    unsigned long flags;
+    int id, change, exists;
+
+    while (1) {
+        spin_lock_irqsave(&vhost->dev_lock, flags);
+
+        id = find_first_bit(vhost->chgmap, vhost->shost->max_id);
+        if (id >= vhost->shost->max_id) {
+            spin_unlock_irqrestore(&vhost->dev_lock, flags);
+            break;
+        }
+        change = vhost->chgtype[id];
+        exists = vhost->devices[id] != NULL;
+
+        vhost->chgtype[id] = 0;
+        clear_bit(id, vhost->chgmap);
+
+        spin_unlock_irqrestore(&vhost->dev_lock, flags);
+
+        if (change < 0) {
+            dev_dbg(&vhost->shost->shost_gendev, "trying to remove target 0:%d:0\n", id);
+            vhba_scan_devices_remove(vhost, id);
+        } else if (change > 0) {
+            dev_dbg(&vhost->shost->shost_gendev, "trying to add target 0:%d:0\n", id);
+            vhba_scan_devices_add(vhost, id);
+        } else {
+            /* quick sequence of add/remove or remove/add; we determine
+               which one it was by checking if device structure exists */
+            if (exists) {
+                /* remove followed by add: remove and (re)add */
+                dev_dbg(&vhost->shost->shost_gendev, "trying to (re)add target 0:%d:0\n", id);
+                vhba_scan_devices_remove(vhost, id);
+                vhba_scan_devices_add(vhost, id);
+            } else {
+                /* add followed by remove: no-op */
+                dev_dbg(&vhost->shost->shost_gendev, "no-op for target 0:%d:0\n", id);
+            }
+        }
+    }
+}
+
+static int vhba_add_device (struct vhba_device *vdev)
+{
+    struct vhba_host *vhost;
+    int i;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    vhba_device_get(vdev);
+
+    spin_lock_irqsave(&vhost->dev_lock, flags);
+    if (vhost->num_devices >= vhost->shost->max_id) {
+        spin_unlock_irqrestore(&vhost->dev_lock, flags);
+        vhba_device_put(vdev);
+        return -EBUSY;
+    }
+
+    for (i = 0; i < vhost->shost->max_id; i++) {
+        if (vhost->devices[i] == NULL) {
+            vdev->id = i;
+            vhost->devices[i] = vdev;
+            vhost->num_devices++;
+            set_bit(vdev->id, vhost->chgmap);
+            vhost->chgtype[vdev->id]++;
+            break;
+        }
+    }
+    spin_unlock_irqrestore(&vhost->dev_lock, flags);
+
+    schedule_work(&vhost->scan_devices);
+
+    return 0;
+}
+
+static int vhba_remove_device (struct vhba_device *vdev)
+{
+    struct vhba_host *vhost;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    spin_lock_irqsave(&vhost->dev_lock, flags);
+    set_bit(vdev->id, vhost->chgmap);
+    vhost->chgtype[vdev->id]--;
+    vhost->devices[vdev->id] = NULL;
+    vhost->num_devices--;
+    vdev->id = VHBA_INVALID_ID;
+    spin_unlock_irqrestore(&vhost->dev_lock, flags);
+
+    vhba_device_put(vdev);
+
+    schedule_work(&vhost->scan_devices);
+
+    return 0;
+}
+
+static struct vhba_device *vhba_lookup_device (int id)
+{
+    struct vhba_host *vhost;
+    struct vhba_device *vdev = NULL;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    if (likely(id < vhost->shost->max_id)) {
+        spin_lock_irqsave(&vhost->dev_lock, flags);
+        vdev = vhost->devices[id];
+        if (vdev) {
+            vdev = vhba_device_get(vdev);
+        }
+
+        spin_unlock_irqrestore(&vhost->dev_lock, flags);
+    }
+
+    return vdev;
+}
+
+static struct vhba_command *vhba_alloc_command (void)
+{
+    struct vhba_host *vhost;
+    struct vhba_command *vcmd;
+    unsigned long flags;
+    int i;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    spin_lock_irqsave(&vhost->cmd_lock, flags);
+
+    vcmd = vhost->commands + vhost->cmd_next++;
+    if (vcmd->status != VHBA_REQ_FREE) {
+        for (i = 0; i < vhost->shost->can_queue; i++) {
+            vcmd = vhost->commands + i;
+
+            if (vcmd->status == VHBA_REQ_FREE) {
+                vhost->cmd_next = i + 1;
+                break;
+            }
+        }
+
+        if (i == vhost->shost->can_queue) {
+            vcmd = NULL;
+        }
+    }
+
+    if (vcmd) {
+        vcmd->status = VHBA_REQ_PENDING;
+    }
+
+    vhost->cmd_next %= vhost->shost->can_queue;
+
+    spin_unlock_irqrestore(&vhost->cmd_lock, flags);
+
+    return vcmd;
+}
+
+static void vhba_free_command (struct vhba_command *vcmd)
+{
+    struct vhba_host *vhost;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    spin_lock_irqsave(&vhost->cmd_lock, flags);
+    vcmd->status = VHBA_REQ_FREE;
+    spin_unlock_irqrestore(&vhost->cmd_lock, flags);
+}
+
+static int vhba_queuecommand_lck (struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+{
+    struct vhba_device *vdev;
+    int retval;
+
+    scmd_dbg(cmd, "queue %lu\n", cmd->serial_number);
+
+    vdev = vhba_lookup_device(cmd->device->id);
+    if (!vdev) {
+        scmd_dbg(cmd, "no such device\n");
+
+        cmd->result = DID_NO_CONNECT << 16;
+        done(cmd);
+
+        return 0;
+    }
+
+    cmd->scsi_done = done;
+    retval = vhba_device_queue(vdev, cmd);
+
+    vhba_device_put(vdev);
+
+    return retval;
+}
+
+#ifdef DEF_SCSI_QCMD
+DEF_SCSI_QCMD(vhba_queuecommand)
+#else
+#define vhba_queuecommand vhba_queuecommand_lck
+#endif
+
+static int vhba_abort (struct scsi_cmnd *cmd)
+{
+    struct vhba_device *vdev;
+    int retval = SUCCESS;
+
+    scmd_warn(cmd, "abort %lu\n", cmd->serial_number);
+
+    vdev = vhba_lookup_device(cmd->device->id);
+    if (vdev) {
+        retval = vhba_device_dequeue(vdev, cmd);
+        vhba_device_put(vdev);
+    } else {
+        cmd->result = DID_NO_CONNECT << 16;
+    }
+
+    return retval;
+}
+
+static struct scsi_host_template vhba_template = {
+    .module = THIS_MODULE,
+    .name = "vhba",
+    .proc_name = "vhba",
+    .queuecommand = vhba_queuecommand,
+    .eh_abort_handler = vhba_abort,
+    .can_queue = VHBA_CAN_QUEUE,
+    .this_id = -1,
+    .cmd_per_lun = 1,
+    .max_sectors = VHBA_MAX_SECTORS_PER_IO,
+    .sg_tablesize = 256,
+};
+
+static ssize_t do_request (struct scsi_cmnd *cmd, char __user *buf, size_t buf_len)
+{
+    struct vhba_request vreq;
+    ssize_t ret;
+
+    scmd_dbg(cmd, "request %lu, cdb 0x%x, bufflen %d, use_sg %d\n",
+        cmd->serial_number, cmd->cmnd[0], scsi_bufflen(cmd), scsi_sg_count(cmd));
+
+    ret = sizeof(vreq);
+    if (DATA_TO_DEVICE(cmd->sc_data_direction)) {
+        ret += scsi_bufflen(cmd);
+    }
+
+    if (ret > buf_len) {
+        scmd_warn(cmd, "buffer too small (%zd < %zd) for a request\n", buf_len, ret);
+        return -EIO;
+    }
+
+    vreq.tag = cmd->serial_number;
+    vreq.lun = cmd->device->lun;
+    memcpy(vreq.cdb, cmd->cmnd, MAX_COMMAND_SIZE);
+    vreq.cdb_len = cmd->cmd_len;
+    vreq.data_len = scsi_bufflen(cmd);
+
+    if (copy_to_user(buf, &vreq, sizeof(vreq))) {
+        return -EFAULT;
+    }
+
+    if (DATA_TO_DEVICE(cmd->sc_data_direction) && vreq.data_len) {
+        buf += sizeof(vreq);
+
+        if (scsi_sg_count(cmd)) {
+            unsigned char buf_stack[64];
+            unsigned char *kaddr, *uaddr, *kbuf;
+            struct scatterlist *sg = scsi_sglist(cmd);
+            int i;
+
+            uaddr = (unsigned char *) buf;
+
+            if (vreq.data_len > 64) {
+                kbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+            } else {
+                kbuf = buf_stack;
+            }
+
+            for (i = 0; i < scsi_sg_count(cmd); i++) {
+                size_t len = sg[i].length;
+
+#ifdef USE_SG_PAGE
+                kaddr = vhba_kmap_atomic(sg_page(&sg[i]));
+#else
+                kaddr = vhba_kmap_atomic(sg[i].page);
+#endif
+                memcpy(kbuf, kaddr + sg[i].offset, len);
+                vhba_kunmap_atomic(kaddr);
+
+                if (copy_to_user(uaddr, kbuf, len)) {
+                    if (kbuf != buf_stack) {
+                        kfree(kbuf);
+                    }
+                    return -EFAULT;
+                }
+                uaddr += len;
+            }
+
+            if (kbuf != buf_stack) {
+                kfree(kbuf);
+            }
+        } else {
+            if (copy_to_user(buf, scsi_sglist(cmd), vreq.data_len)) {
+                return -EFAULT;
+            }
+        }
+    }
+
+    return ret;
+}
+
+static ssize_t do_response (struct scsi_cmnd *cmd, const char __user *buf, size_t buf_len, struct vhba_response *res)
+{
+    ssize_t ret = 0;
+
+    scmd_dbg(cmd, "response %lu, status %x, data len %d, use_sg %d\n",
+         cmd->serial_number, res->status, res->data_len, scsi_sg_count(cmd));
+
+    if (res->status) {
+        unsigned char sense_stack[SCSI_SENSE_BUFFERSIZE];
+
+        if (res->data_len > SCSI_SENSE_BUFFERSIZE) {
+            scmd_warn(cmd, "truncate sense (%d < %d)", SCSI_SENSE_BUFFERSIZE, res->data_len);
+            res->data_len = SCSI_SENSE_BUFFERSIZE;
+        }
+
+        /* Copy via temporary buffer on stack in order to avoid problems
+           with PAX on grsecurity-enabled kernels */
+        if (copy_from_user(sense_stack, buf, res->data_len)) {
+            return -EFAULT;
+        }
+        memcpy(cmd->sense_buffer, sense_stack, res->data_len);
+
+        cmd->result = res->status;
+
+        ret += res->data_len;
+    } else if (DATA_FROM_DEVICE(cmd->sc_data_direction) && scsi_bufflen(cmd)) {
+        size_t to_read;
+
+        if (res->data_len > scsi_bufflen(cmd)) {
+            scmd_warn(cmd, "truncate data (%d < %d)\n", scsi_bufflen(cmd), res->data_len);
+            res->data_len = scsi_bufflen(cmd);
+        }
+
+        to_read = res->data_len;
+
+        if (scsi_sg_count(cmd)) {
+            unsigned char buf_stack[64];
+            unsigned char *kaddr, *uaddr, *kbuf;
+            struct scatterlist *sg = scsi_sglist(cmd);
+            int i;
+
+            uaddr = (unsigned char *)buf;
+
+            if (res->data_len > 64) {
+                kbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+            } else {
+                kbuf = buf_stack;
+            }
+
+            for (i = 0; i < scsi_sg_count(cmd); i++) {
+                size_t len = (sg[i].length < to_read) ? sg[i].length : to_read;
+
+                if (copy_from_user(kbuf, uaddr, len)) {
+                    if (kbuf != buf_stack) {
+                        kfree(kbuf);
+                    }
+                    return -EFAULT;
+                }
+                uaddr += len;
+
+#ifdef USE_SG_PAGE
+                kaddr = vhba_kmap_atomic(sg_page(&sg[i]));
+#else
+                kaddr = vhba_kmap_atomic(sg[i].page);
+#endif
+                memcpy(kaddr + sg[i].offset, kbuf, len);
+                vhba_kunmap_atomic(kaddr);
+
+                to_read -= len;
+                if (to_read == 0) {
+                    break;
+                }
+            }
+
+            if (kbuf != buf_stack) {
+                kfree(kbuf);
+            }
+        } else {
+            if (copy_from_user(scsi_sglist(cmd), buf, res->data_len)) {
+                return -EFAULT;
+            }
+
+            to_read -= res->data_len;
+        }
+
+        scsi_set_resid(cmd, to_read);
+
+        ret += res->data_len - to_read;
+    }
+
+    return ret;
+}
+
+static inline struct vhba_command *next_command (struct vhba_device *vdev)
+{
+    struct vhba_command *vcmd;
+
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        if (vcmd->status == VHBA_REQ_PENDING) {
+            break;
+        }
+    }
+
+    if (&vcmd->entry == &vdev->cmd_list) {
+        vcmd = NULL;
+    }
+
+    return vcmd;
+}
+
+static inline struct vhba_command *match_command (struct vhba_device *vdev, u32 tag)
+{
+    struct vhba_command *vcmd;
+
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        if (vcmd->cmd->serial_number == tag) {
+            break;
+        }
+    }
+
+    if (&vcmd->entry == &vdev->cmd_list) {
+        vcmd = NULL;
+    }
+
+    return vcmd;
+}
+
+static struct vhba_command *wait_command (struct vhba_device *vdev, unsigned long flags)
+{
+    struct vhba_command *vcmd;
+    DEFINE_WAIT(wait);
+
+    while (!(vcmd = next_command(vdev))) {
+        if (signal_pending(current)) {
+            break;
+        }
+
+        prepare_to_wait(&vdev->cmd_wq, &wait, TASK_INTERRUPTIBLE);
+
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+        schedule();
+
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+    }
+
+    finish_wait(&vdev->cmd_wq, &wait);
+    if (vcmd) {
+        vcmd->status = VHBA_REQ_READING;
+    }
+
+    return vcmd;
+}
+
+static ssize_t vhba_ctl_read (struct file *file, char __user *buf, size_t buf_len, loff_t *offset)
+{
+    struct vhba_device *vdev;
+    struct vhba_command *vcmd;
+    ssize_t ret;
+    unsigned long flags;
+
+    vdev = file->private_data;
+
+    /* Get next command */
+    if (file->f_flags & O_NONBLOCK) {
+        /* Non-blocking variant */
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+        vcmd = next_command(vdev);
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+        if (!vcmd) {
+            return -EWOULDBLOCK;
+        }
+    } else {
+        /* Blocking variant */
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+        vcmd = wait_command(vdev, flags);
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+        if (!vcmd) {
+            return -ERESTARTSYS;
+        }
+    }
+
+    ret = do_request(vcmd->cmd, buf, buf_len);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    if (ret >= 0) {
+        vcmd->status = VHBA_REQ_SENT;
+        *offset += ret;
+    } else {
+        vcmd->status = VHBA_REQ_PENDING;
+    }
+
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return ret;
+}
+
+static ssize_t vhba_ctl_write (struct file *file, const char __user *buf, size_t buf_len, loff_t *offset)
+{
+    struct vhba_device *vdev;
+    struct vhba_command *vcmd;
+    struct vhba_response res;
+    ssize_t ret;
+    unsigned long flags;
+
+    if (buf_len < sizeof(res)) {
+        return -EIO;
+    }
+
+    if (copy_from_user(&res, buf, sizeof(res))) {
+        return -EFAULT;
+    }
+
+    vdev = file->private_data;
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    vcmd = match_command(vdev, res.tag);
+    if (!vcmd || vcmd->status != VHBA_REQ_SENT) {
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+        DPRINTK("not expecting response\n");
+        return -EIO;
+    }
+    vcmd->status = VHBA_REQ_WRITING;
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    ret = do_response(vcmd->cmd, buf + sizeof(res), buf_len - sizeof(res), &res);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    if (ret >= 0) {
+        vcmd->cmd->scsi_done(vcmd->cmd);
+        ret += sizeof(res);
+
+        /* don't compete with vhba_device_dequeue */
+        if (!list_empty(&vcmd->entry)) {
+            list_del_init(&vcmd->entry);
+            vhba_free_command(vcmd);
+        }
+    } else {
+        vcmd->status = VHBA_REQ_SENT;
+    }
+
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return ret;
+}
+
+static long vhba_ctl_ioctl (struct file *file, unsigned int cmd, unsigned long arg)
+{
+    struct vhba_device *vdev = file->private_data;
+    struct vhba_host *vhost;
+    struct scsi_device *sdev;
+
+    switch (cmd) {
+        case 0xBEEF001: {
+            vhost = platform_get_drvdata(&vhba_platform_device);
+            sdev = scsi_device_lookup(vhost->shost, 0, vdev->id, 0);
+
+            if (sdev) {
+                int id[4] = {
+                    sdev->host->host_no,
+                    sdev->channel,
+                    sdev->id,
+                    sdev->lun
+                };
+
+                scsi_device_put(sdev);
+
+                if (copy_to_user((void *)arg, id, sizeof(id))) {
+                    return -EFAULT;
+                }
+
+                return 0;
+            } else {
+                return -ENODEV;
+            }
+        }
+    }
+
+    return -ENOTTY;
+}
+
+#ifdef CONFIG_COMPAT
+static long vhba_ctl_compat_ioctl (struct file *file, unsigned int cmd, unsigned long arg)
+{
+    unsigned long compat_arg = (unsigned long)compat_ptr(arg);
+    return vhba_ctl_ioctl(file, cmd, compat_arg);
+}
+#endif
+
+static unsigned int vhba_ctl_poll (struct file *file, poll_table *wait)
+{
+    struct vhba_device *vdev = file->private_data;
+    unsigned int mask = 0;
+    unsigned long flags;
+
+    poll_wait(file, &vdev->cmd_wq, wait);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    if (next_command(vdev)) {
+        mask |= POLLIN | POLLRDNORM;
+    }
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return mask;
+}
+
+static int vhba_ctl_open (struct inode *inode, struct file *file)
+{
+    struct vhba_device *vdev;
+    int retval;
+
+    DPRINTK("open\n");
+
+    /* check if vhba is probed */
+    if (!platform_get_drvdata(&vhba_platform_device)) {
+        return -ENODEV;
+    }
+
+    vdev = vhba_device_alloc();
+    if (!vdev) {
+        return -ENOMEM;
+    }
+
+    if (!(retval = vhba_add_device(vdev))) {
+        file->private_data = vdev;
+    }
+
+    vhba_device_put(vdev);
+
+    return retval;
+}
+
+static int vhba_ctl_release (struct inode *inode, struct file *file)
+{
+    struct vhba_device *vdev;
+    struct vhba_command *vcmd;
+    unsigned long flags;
+
+    DPRINTK("release\n");
+
+    vdev = file->private_data;
+
+    vhba_device_get(vdev);
+    vhba_remove_device(vdev);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        WARN_ON(vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING);
+
+        scmd_warn(vcmd->cmd, "device released with command %lu\n", vcmd->cmd->serial_number);
+        vcmd->cmd->result = DID_NO_CONNECT << 16;
+        vcmd->cmd->scsi_done(vcmd->cmd);
+
+        vhba_free_command(vcmd);
+    }
+    INIT_LIST_HEAD(&vdev->cmd_list);
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    vhba_device_put(vdev);
+
+    return 0;
+}
+
+static struct file_operations vhba_ctl_fops = {
+    .owner = THIS_MODULE,
+    .open = vhba_ctl_open,
+    .release = vhba_ctl_release,
+    .read = vhba_ctl_read,
+    .write = vhba_ctl_write,
+    .poll = vhba_ctl_poll,
+    .unlocked_ioctl = vhba_ctl_ioctl,
+#ifdef CONFIG_COMPAT
+    .compat_ioctl = vhba_ctl_compat_ioctl,
+#endif
+};
+
+static struct miscdevice vhba_miscdev = {
+    .minor = MISC_DYNAMIC_MINOR,
+    .name = "vhba_ctl",
+    .fops = &vhba_ctl_fops,
+};
+
+static int vhba_probe (struct platform_device *pdev)
+{
+    struct Scsi_Host *shost;
+    struct vhba_host *vhost;
+    int i;
+
+    shost = scsi_host_alloc(&vhba_template, sizeof(struct vhba_host));
+    if (!shost) {
+        return -ENOMEM;
+    }
+
+    shost->max_id = VHBA_MAX_ID;
+    /* we don't support lun > 0 */
+    shost->max_lun = 1;
+    shost->max_cmd_len = MAX_COMMAND_SIZE;
+
+    vhost = (struct vhba_host *)shost->hostdata;
+    memset(vhost, 0, sizeof(*vhost));
+
+    vhost->shost = shost;
+    vhost->num_devices = 0;
+    spin_lock_init(&vhost->dev_lock);
+    spin_lock_init(&vhost->cmd_lock);
+    INIT_WORK(&vhost->scan_devices, vhba_scan_devices);
+    vhost->cmd_next = 0;
+    for (i = 0; i < vhost->shost->can_queue; i++) {
+        vhost->commands[i].status = VHBA_REQ_FREE;
+    }
+
+    platform_set_drvdata(pdev, vhost);
+
+    if (scsi_add_host(shost, &pdev->dev)) {
+        scsi_host_put(shost);
+        return -ENOMEM;
+    }
+
+    return 0;
+}
+
+static int vhba_remove (struct platform_device *pdev)
+{
+    struct vhba_host *vhost;
+    struct Scsi_Host *shost;
+
+    vhost = platform_get_drvdata(pdev);
+    shost = vhost->shost;
+
+    scsi_remove_host(shost);
+    scsi_host_put(shost);
+
+    return 0;
+}
+
+static void vhba_release (struct device * dev)
+{
+    return;
+}
+
+static struct platform_device vhba_platform_device = {
+    .name = "vhba",
+    .id = -1,
+    .dev = {
+        .release = vhba_release,
+    },
+};
+
+static struct platform_driver vhba_platform_driver = {
+    .driver = {
+        .owner = THIS_MODULE,
+        .name = "vhba",
+    },
+    .probe = vhba_probe,
+    .remove = vhba_remove,
+};
+
+static int __init vhba_init (void)
+{
+    int ret;
+
+    ret = platform_device_register(&vhba_platform_device);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = platform_driver_register(&vhba_platform_driver);
+    if (ret < 0) {
+        platform_device_unregister(&vhba_platform_device);
+        return ret;
+    }
+
+    ret = misc_register(&vhba_miscdev);
+    if (ret < 0) {
+        platform_driver_unregister(&vhba_platform_driver);
+        platform_device_unregister(&vhba_platform_device);
+        return ret;
+    }
+
+    return 0;
+}
+
+static void __exit vhba_exit(void)
+{
+    misc_deregister(&vhba_miscdev);
+    platform_driver_unregister(&vhba_platform_driver);
+    platform_device_unregister(&vhba_platform_device);
+}
+
+module_init(vhba_init);
+module_exit(vhba_exit);
+
diff -Nur linux-4.2/drivers/tty/Kconfig linux-4.2-pck/drivers/tty/Kconfig
--- linux-4.2/drivers/tty/Kconfig	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/tty/Kconfig	2015-09-08 00:48:22.228363880 -0300
@@ -75,6 +75,124 @@
 	def_bool y
 	depends on VT_CONSOLE && PM_SLEEP
 
+menuconfig VT_CKO
+	bool "Colored kernel message output"
+	depends on VT_CONSOLE
+	---help---
+	  This option enables kernel messages to be emitted in
+	  colors other than the default.
+
+	  The color value you need to enter is composed (OR-ed)
+	  of a foreground and a background color.
+
+	  Foreground:
+	  0x00 = black,   0x08 = dark gray,
+	  0x01 = red,     0x09 = light red,
+	  0x02 = green,   0x0A = light green,
+	  0x03 = brown,   0x0B = yellow,
+	  0x04 = blue,    0x0C = light blue,
+	  0x05 = magenta, 0x0D = light magenta,
+	  0x06 = cyan,    0x0E = light cyan,
+	  0x07 = gray,    0x0F = white,
+
+	  (Foreground colors 0x08 to 0x0F do not work when a VGA
+	  console font with 512 glyphs is used.)
+
+	  Background:
+	  0x00 = black,   0x40 = blue,
+	  0x10 = red,     0x50 = magenta,
+	  0x20 = green,   0x60 = cyan,
+	  0x30 = brown,   0x70 = gray,
+
+	  For example, 0x1F would yield white on red.
+
+	  If unsure, say N.
+
+config VT_PRINTK_EMERG_COLOR
+	hex "Emergency messages color"
+	range 0x00 0xFF
+	depends on VT_CKO
+	default 0x07
+	---help---
+	  This option defines with which color kernel emergency messages will
+	  be printed to the console.
+
+config VT_PRINTK_ALERT_COLOR
+	hex "Alert messages color"
+	range 0x00 0xFF
+	depends on VT_CKO
+	default 0x07
+	---help---
+	  This option defines with which color kernel alert messages will
+	  be printed to the console.
+
+config VT_PRINTK_CRIT_COLOR
+	hex "Critical messages color"
+	range 0x00 0xFF
+	depends on VT_CKO
+	default 0x07
+	---help---
+	  This option defines with which color kernel critical messages will
+	  be printed to the console.
+
+config VT_PRINTK_ERR_COLOR
+	hex "Error messages color"
+	range 0x00 0xFF
+	depends on VT_CKO
+	default 0x07
+	---help---
+	  This option defines with which color kernel error messages will
+	  be printed to the console.
+
+config VT_PRINTK_WARNING_COLOR
+	hex "Warning messages color"
+	range 0x00 0xFF
+	depends on VT_CKO
+	default 0x07
+	---help---
+	  This option defines with which color kernel warning messages will
+	  be printed to the console.
+
+config VT_PRINTK_NOTICE_COLOR
+	hex "Notice messages color"
+	range 0x00 0xFF
+	depends on VT_CKO
+	default 0x07
+	---help---
+	  This option defines with which color kernel notice messages will
+	  be printed to the console.
+
+config VT_PRINTK_INFO_COLOR
+	hex "Information messages color"
+	range 0x00 0xFF
+	depends on VT_CKO
+	default 0x07
+	---help---
+	  This option defines with which color kernel information messages will
+	  be printed to the console.
+
+config VT_PRINTK_DEBUG_COLOR
+	hex "Debug messages color"
+	range 0x00 0xFF
+	depends on VT_CKO
+	default 0x07
+	---help---
+	  This option defines with which color kernel debug messages will
+	  be printed to the console.
+
+config NR_TTY_DEVICES
+        int "Maximum tty device number"
+        depends on VT
+        range 12 63
+        default 63
+        ---help---
+          This option is used to change the number of tty devices in /dev.
+          The default value is 63. The lowest number you can set is 12,
+          63 is also the upper limit so we don't overrun the serial
+          consoles.
+
+          If unsure, say 63.
+
 config HW_CONSOLE
 	bool
 	depends on VT && !UML
diff -Nur linux-4.2/drivers/tty/serial/8250/8250_core.c linux-4.2-pck/drivers/tty/serial/8250/8250_core.c
--- linux-4.2/drivers/tty/serial/8250/8250_core.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/tty/serial/8250/8250_core.c	2015-09-08 00:48:22.228363880 -0300
@@ -3339,7 +3339,7 @@
  *	The console_lock must be held when we get here.
  */
 static void serial8250_console_write(struct uart_8250_port *up, const char *s,
-				     unsigned int count)
+				     unsigned int count, unsigned int loglevel)
 {
 	struct uart_port *port = &up->port;
 	unsigned long flags;
@@ -3413,11 +3413,11 @@
 }
 
 static void univ8250_console_write(struct console *co, const char *s,
-				   unsigned int count)
+				   unsigned int count, unsigned int loglevel)
 {
 	struct uart_8250_port *up = &serial8250_ports[co->index];
 
-	serial8250_console_write(up, s, count);
+	serial8250_console_write(up, s, count, loglevel);
 }
 
 static unsigned int probe_baud(struct uart_port *port)
diff -Nur linux-4.2/drivers/tty/serial/8250/8250_early.c linux-4.2-pck/drivers/tty/serial/8250/8250_early.c
--- linux-4.2/drivers/tty/serial/8250/8250_early.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/tty/serial/8250/8250_early.c	2015-09-08 00:48:22.228363880 -0300
@@ -90,7 +90,7 @@
 }
 
 static void __init early_serial8250_write(struct console *console,
-					const char *s, unsigned int count)
+          const char *s, unsigned int count, unsigned int loglevel)
 {
 	struct earlycon_device *device = console->data;
 	struct uart_port *port = &device->port;
diff -Nur linux-4.2/drivers/tty/vt/vt.c linux-4.2-pck/drivers/tty/vt/vt.c
--- linux-4.2/drivers/tty/vt/vt.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/drivers/tty/vt/vt.c	2015-09-08 00:48:22.228363880 -0300
@@ -71,6 +71,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/tty.h>
@@ -2531,16 +2532,44 @@
 		return kmsg_con;
 }
 
+#ifdef CONFIG_VT_CKO
+static unsigned int printk_color[8] __read_mostly = {
+	CONFIG_VT_PRINTK_EMERG_COLOR,	/* KERN_EMERG */
+	CONFIG_VT_PRINTK_ALERT_COLOR,	/* KERN_ALERT */
+	CONFIG_VT_PRINTK_CRIT_COLOR,	/* KERN_CRIT */
+	CONFIG_VT_PRINTK_ERR_COLOR,	/* KERN_ERR */
+	CONFIG_VT_PRINTK_WARNING_COLOR,	/* KERN_WARNING */
+	CONFIG_VT_PRINTK_NOTICE_COLOR,	/* KERN_NOTICE */
+	CONFIG_VT_PRINTK_INFO_COLOR,	/* KERN_INFO */
+	CONFIG_VT_PRINTK_DEBUG_COLOR,	/* KERN_DEBUG */
+};
+module_param_array(printk_color, uint, NULL, S_IRUGO | S_IWUSR);
+
+static inline void vc_set_color(struct vc_data *vc, unsigned char color)
+{
+	vc->vc_color = color_table[color & 0xF] |
+	               (color_table[(color >> 4) & 0x7] << 4) |
+	               (color & 0x80);
+	update_attr(vc);
+}
+#else
+static unsigned int printk_color[8];
+static inline void vc_set_color(const struct vc_data *vc, unsigned char c)
+{
+}
+#endif
+
 /*
  *	Console on virtual terminal
  *
  * The console must be locked when we get here.
  */
 
-static void vt_console_print(struct console *co, const char *b, unsigned count)
+static void vt_console_print(struct console *co, const char *b, unsigned count,
+			     unsigned int loglevel)
 {
 	struct vc_data *vc = vc_cons[fg_console].d;
-	unsigned char c;
+	unsigned char current_color, c;
 	static DEFINE_SPINLOCK(printing_lock);
 	const ushort *start;
 	ushort cnt = 0;
@@ -2576,11 +2605,20 @@
 
 	start = (ushort *)vc->vc_pos;
 
+	/*
+	 * We always get a valid loglevel - <8> and "no level" is transformed
+	 * to <4> in the typical kernel.
+	 */
+	current_color = printk_color[loglevel];
+	vc_set_color(vc, current_color);
+
+
 	/* Contrived structure to try to emulate original need_wrap behaviour
 	 * Problems caused when we have need_wrap set on '\n' character */
 	while (count--) {
 		c = *b++;
 		if (c == 10 || c == 13 || c == 8 || vc->vc_need_wrap) {
+			vc_set_color(vc, vc->vc_def_color);
 			if (cnt > 0) {
 				if (CON_IS_VISIBLE(vc))
 					vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x);
@@ -2593,6 +2631,7 @@
 				bs(vc);
 				start = (ushort *)vc->vc_pos;
 				myx = vc->vc_x;
+				vc_set_color(vc, current_color);
 				continue;
 			}
 			if (c != 13)
@@ -2600,6 +2639,7 @@
 			cr(vc);
 			start = (ushort *)vc->vc_pos;
 			myx = vc->vc_x;
+			vc_set_color(vc, current_color);
 			if (c == 10 || c == 13)
 				continue;
 		}
@@ -2622,6 +2662,7 @@
 			vc->vc_need_wrap = 1;
 		}
 	}
+	vc_set_color(vc, vc->vc_def_color);
 	set_cursor(vc);
 	notify_update(vc);
 
diff -Nur linux-4.2/fs/drop_caches.c linux-4.2-pck/fs/drop_caches.c
--- linux-4.2/fs/drop_caches.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/fs/drop_caches.c	2015-09-08 11:20:32.183692475 -0300
@@ -8,6 +8,7 @@
 #include <linux/writeback.h>
 #include <linux/sysctl.h>
 #include <linux/gfp.h>
+#include <linux/export.h>
 #include "internal.h"
 
 /* A global variable is a bit ugly, but it keeps the code simple */
@@ -37,6 +38,12 @@
 	iput(toput_inode);
 }
 
+/* For TuxOnIce */
+void drop_pagecache(void)
+{
+	iterate_supers(drop_pagecache_sb, NULL);
+}
+
 int drop_caches_sysctl_handler(struct ctl_table *table, int write,
 	void __user *buffer, size_t *length, loff_t *ppos)
 {
diff -Nur linux-4.2/fs/exec.c linux-4.2-pck/fs/exec.c
--- linux-4.2/fs/exec.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/fs/exec.c	2015-09-08 00:51:03.460668141 -0300
@@ -19,7 +19,7 @@
  * current->executable is only used by the procfs.  This allows a dispatch
  * table to check for several different types  of binary formats.  We keep
  * trying until we recognize the file or we run out of supported binary
- * formats. 
+ * formats.
  */
 
 #include <linux/slab.h>
@@ -56,6 +56,9 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
 #include <linux/compat.h>
+#include <linux/ksm.h>
+
+#include <trace/events/fs.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -784,8 +787,10 @@
 	if (err)
 		goto exit;
 
-	if (name->name[0] != '\0')
+	if (name->name[0] != '\0') {
 		fsnotify_open(file);
+		trace_open_exec(name->name);
+	}
 
 out:
 	return file;
@@ -1156,6 +1161,7 @@
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
 	current->self_exec_id++;
+
 	flush_signal_handlers(current, 0);
 	do_close_on_exec(current->files);
 }
diff -Nur linux-4.2/fs/open.c linux-4.2-pck/fs/open.c
--- linux-4.2/fs/open.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/fs/open.c	2015-09-08 00:48:22.228363880 -0300
@@ -34,6 +34,9 @@
 
 #include "internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/fs.h>
+
 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
 	struct file *filp)
 {
@@ -1029,6 +1032,7 @@
 		} else {
 			fsnotify_open(f);
 			fd_install(fd, f);
+			trace_do_sys_open(tmp->name, flags, mode);
 		}
 	}
 	putname(tmp);
diff -Nur linux-4.2/fs/proc/meminfo.c linux-4.2-pck/fs/proc/meminfo.c
--- linux-4.2/fs/proc/meminfo.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/fs/proc/meminfo.c	2015-09-08 00:51:03.460668141 -0300
@@ -124,6 +124,9 @@
 		"SUnreclaim:     %8lu kB\n"
 		"KernelStack:    %8lu kB\n"
 		"PageTables:     %8lu kB\n"
+#ifdef CONFIG_UKSM
+		"KsmZeroPages:   %8lu kB\n"
+#endif
 #ifdef CONFIG_QUICKLIST
 		"Quicklists:     %8lu kB\n"
 #endif
@@ -182,6 +185,9 @@
 		K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
 		global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
 		K(global_page_state(NR_PAGETABLE)),
+#ifdef CONFIG_UKSM
+		K(global_page_state(NR_UKSM_ZERO_PAGES)),
+#endif
 #ifdef CONFIG_QUICKLIST
 		K(quicklist_total_size()),
 #endif
diff -Nur linux-4.2/fs/super.c linux-4.2-pck/fs/super.c
--- linux-4.2/fs/super.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/fs/super.c	2015-09-08 11:20:32.190358817 -0300
@@ -36,7 +36,7 @@
 #include "internal.h"
 
 
-static LIST_HEAD(super_blocks);
+LIST_HEAD(super_blocks);
 static DEFINE_SPINLOCK(sb_lock);
 
 static char *sb_writers_name[SB_FREEZE_LEVELS] = {
diff -Nur linux-4.2/include/acpi/acconfig.h linux-4.2-pck/include/acpi/acconfig.h
--- linux-4.2/include/acpi/acconfig.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/acpi/acconfig.h	2015-09-08 00:48:22.228363880 -0300
@@ -112,9 +112,19 @@
  *
  *****************************************************************************/
 
-/* Version of ACPI supported */
-
+/*
+ * Version of ACPI supported. This is a sad story. Windows reports a _REV of
+ * 2 regardless of the spec version implemented. Some vendors are using _REV
+ * as a way to distinguish between Windows and Linux, and are breaking systems
+ * in the process. We can't guarantee that they'll call _OSI before checking
+ * _REV, so hardcode this to 2 on x86 systems right now and leave it at the
+ * appropriate spec value for everybody else.
+ */
+#ifdef CONFIG_X86
+#define ACPI_CA_SUPPORT_LEVEL           2
+#else
 #define ACPI_CA_SUPPORT_LEVEL           5
+#endif
 
 /* Maximum count for a semaphore object */
 
diff -Nur linux-4.2/include/asm-generic/pgtable.h linux-4.2-pck/include/asm-generic/pgtable.h
--- linux-4.2/include/asm-generic/pgtable.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/asm-generic/pgtable.h	2015-09-08 00:51:03.460668141 -0300
@@ -555,12 +555,25 @@
 			unsigned long size);
 #endif
 
+#ifdef CONFIG_UKSM
+static inline int is_uksm_zero_pfn(unsigned long pfn)
+{
+	extern unsigned long uksm_zero_pfn;
+        return pfn == uksm_zero_pfn;
+}
+#else
+static inline int is_uksm_zero_pfn(unsigned long pfn)
+{
+        return 0;
+}
+#endif
+
 #ifdef __HAVE_COLOR_ZERO_PAGE
 static inline int is_zero_pfn(unsigned long pfn)
 {
 	extern unsigned long zero_pfn;
 	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
-	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
+	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT) || is_uksm_zero_pfn(pfn);
 }
 
 #define my_zero_pfn(addr)	page_to_pfn(ZERO_PAGE(addr))
@@ -569,7 +582,7 @@
 static inline int is_zero_pfn(unsigned long pfn)
 {
 	extern unsigned long zero_pfn;
-	return pfn == zero_pfn;
+	return (pfn == zero_pfn) || (is_uksm_zero_pfn(pfn));
 }
 
 static inline unsigned long my_zero_pfn(unsigned long addr)
diff -Nur linux-4.2/include/linux/bio.h linux-4.2-pck/include/linux/bio.h
--- linux-4.2/include/linux/bio.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/bio.h	2015-09-08 11:20:32.227023691 -0300
@@ -32,6 +32,8 @@
 /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
 #include <linux/blk_types.h>
 
+extern int trap_non_toi_io;
+
 #define BIO_DEBUG
 
 #ifdef BIO_DEBUG
diff -Nur linux-4.2/include/linux/blk_types.h linux-4.2-pck/include/linux/blk_types.h
--- linux-4.2/include/linux/blk_types.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/blk_types.h	2015-09-08 11:20:32.253689056 -0300
@@ -121,13 +121,14 @@
 #define BIO_SNAP_STABLE	7	/* bio data must be snapshotted during write */
 #define BIO_CHAIN	8	/* chained bio, ->bi_remaining in effect */
 #define BIO_REFFED	9	/* bio has elevated ->bi_cnt */
+#define BIO_TOI		10	/* bio is TuxOnIce submitted */	
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
  * BIO_POOL_IDX()
  */
-#define BIO_RESET_BITS	13
-#define BIO_OWNS_VEC	13	/* bio_free() should free bvec */
+#define BIO_RESET_BITS	14
+#define BIO_OWNS_VEC	14	/* bio_free() should free bvec */
 
 #define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
 
diff -Nur linux-4.2/include/linux/cgroup_subsys.h linux-4.2-pck/include/linux/cgroup_subsys.h
--- linux-4.2/include/linux/cgroup_subsys.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/cgroup_subsys.h	2015-09-08 00:48:22.228363880 -0300
@@ -35,6 +35,10 @@
 SUBSYS(net_cls)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_BFQIO)
+SUBSYS(bfqio)
+#endif
+
 #if IS_ENABLED(CONFIG_CGROUP_PERF)
 SUBSYS(perf_event)
 #endif
diff -Nur linux-4.2/include/linux/console.h linux-4.2-pck/include/linux/console.h
--- linux-4.2/include/linux/console.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/console.h	2015-09-08 00:48:22.228363880 -0300
@@ -119,7 +119,7 @@
 
 struct console {
 	char	name[16];
-	void	(*write)(struct console *, const char *, unsigned);
+	void	(*write)(struct console *, const char *, unsigned, unsigned int);
 	int	(*read)(struct console *, char *, unsigned);
 	struct tty_driver *(*device)(struct console *, int *);
 	void	(*unblank)(void);
diff -Nur linux-4.2/include/linux/fs.h linux-4.2-pck/include/linux/fs.h
--- linux-4.2/include/linux/fs.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/fs.h	2015-09-08 11:20:32.257022227 -0300
@@ -1258,6 +1258,8 @@
 #define UMOUNT_NOFOLLOW	0x00000008	/* Don't follow symlink on umount */
 #define UMOUNT_UNUSED	0x80000000	/* Flag guaranteed to be unused */
 
+extern struct list_head super_blocks;
+
 /* sb->s_iflags */
 #define SB_I_CGROUPWB	0x00000001	/* cgroup-aware writeback enabled */
 
@@ -1741,6 +1743,8 @@
 #else
 #define S_DAX		0	/* Make all the DAX code disappear */
 #endif
+#define S_ATOMIC_COPY	16384	/* Pages mapped with this inode need to be
+				   atomically copied (gem) */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -2269,6 +2273,13 @@
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
+extern int fsync_super(struct super_block *);
+extern int fsync_no_super(struct block_device *);
+#define FS_FREEZER_FUSE 1
+#define FS_FREEZER_NORMAL 2
+#define FS_FREEZER_ALL (FS_FREEZER_FUSE | FS_FREEZER_NORMAL)
+void freeze_filesystems(int which);
+void thaw_filesystems(int which);
 
 extern struct super_block *blockdev_superblock;
 
diff -Nur linux-4.2/include/linux/fs_uuid.h linux-4.2-pck/include/linux/fs_uuid.h
--- linux-4.2/include/linux/fs_uuid.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/include/linux/fs_uuid.h	2015-09-08 11:20:32.257022227 -0300
@@ -0,0 +1,19 @@
+#include <linux/device.h>
+
+struct hd_struct;
+struct block_device;
+
+struct fs_info {
+	char uuid[16];
+	dev_t dev_t;
+	char *last_mount;
+	int last_mount_size;
+};
+
+int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek);
+dev_t blk_lookup_fs_info(struct fs_info *seek);
+struct fs_info *fs_info_from_block_dev(struct block_device *bdev);
+void free_fs_info(struct fs_info *fs_info);
+int bdev_matches_key(struct block_device *bdev, const char *key);
+struct block_device *next_bdev_of_type(struct block_device *last,
+	const char *key);
diff -Nur linux-4.2/include/linux/gfp.h linux-4.2-pck/include/linux/gfp.h
--- linux-4.2/include/linux/gfp.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/gfp.h	2015-09-08 11:20:32.260355398 -0300
@@ -35,6 +35,7 @@
 #define ___GFP_NO_KSWAPD	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
 #define ___GFP_WRITE		0x1000000u
+#define ___GFP_TOI_NOTRACK      0x2000000u
 /* If the above are modified, __GFP_BITS_SHIFT may need updating */
 
 /*
@@ -94,6 +95,7 @@
 #define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
+#define __GFP_TOI_NOTRACK	((__force gfp_t)___GFP_TOI_NOTRACK)	/* Allocator wants page untracked by TOI */
 
 /*
  * This may seem redundant, but it's a way of annotating false positives vs.
@@ -101,7 +103,7 @@
  */
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
 
-#define __GFP_BITS_SHIFT 25	/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 26	/* Room for N __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* This equals 0, but use constants in case they ever change */
diff -Nur linux-4.2/include/linux/ksm.h linux-4.2-pck/include/linux/ksm.h
--- linux-4.2/include/linux/ksm.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/ksm.h	2015-09-08 00:51:03.460668141 -0300
@@ -19,21 +19,6 @@
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
-int __ksm_enter(struct mm_struct *mm);
-void __ksm_exit(struct mm_struct *mm);
-
-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
-{
-	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
-		return __ksm_enter(mm);
-	return 0;
-}
-
-static inline void ksm_exit(struct mm_struct *mm)
-{
-	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
-		__ksm_exit(mm);
-}
 
 static inline struct stable_node *page_stable_node(struct page *page)
 {
@@ -64,6 +49,33 @@
 int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
 void ksm_migrate_page(struct page *newpage, struct page *oldpage);
 
+#ifdef CONFIG_KSM_LEGACY
+int __ksm_enter(struct mm_struct *mm);
+void __ksm_exit(struct mm_struct *mm);
+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+		return __ksm_enter(mm);
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
+		__ksm_exit(mm);
+}
+
+#elif defined(CONFIG_UKSM)
+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+}
+#endif /* !CONFIG_UKSM */
+
 #else  /* !CONFIG_KSM */
 
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
@@ -106,4 +118,6 @@
 #endif /* CONFIG_MMU */
 #endif /* !CONFIG_KSM */
 
+#include <linux/uksm.h>
+
 #endif /* __LINUX_KSM_H */
diff -Nur linux-4.2/include/linux/libata.h linux-4.2-pck/include/linux/libata.h
--- linux-4.2/include/linux/libata.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/libata.h	2015-09-08 00:48:22.228363880 -0300
@@ -516,6 +516,7 @@
 enum ata_lpm_policy {
 	ATA_LPM_UNKNOWN,
 	ATA_LPM_MAX_POWER,
+	ATA_LPM_FIRMWARE_DEFAULTS,
 	ATA_LPM_MED_POWER,
 	ATA_LPM_MIN_POWER,
 };
@@ -728,6 +729,8 @@
 	int			spdn_cnt;
 	/* ering is CLEAR_END, read comment above CLEAR_END */
 	struct ata_ering	ering;
+	/* Initial DIPM configuration */
+	bool			init_dipm;
 };
 
 /* Fields between ATA_DEVICE_CLEAR_BEGIN and ATA_DEVICE_CLEAR_END are
@@ -798,6 +801,7 @@
 
 	struct ata_device	device[ATA_MAX_DEVICES];
 
+	u8			init_lpm; /* initial lpm configuration */
 	unsigned long		last_lpm_change; /* when last LPM change happened */
 };
 #define ATA_LINK_CLEAR_BEGIN		offsetof(struct ata_link, active_tag)
diff -Nur linux-4.2/include/linux/mm.h linux-4.2-pck/include/linux/mm.h
--- linux-4.2/include/linux/mm.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/mm.h	2015-09-08 11:20:32.273688080 -0300
@@ -2137,6 +2137,7 @@
 					void __user *, size_t *, loff_t *);
 #endif
 
+void drop_pagecache(void);
 void drop_slab(void);
 void drop_slab_node(int nid);
 
diff -Nur linux-4.2/include/linux/mm_types.h linux-4.2-pck/include/linux/mm_types.h
--- linux-4.2/include/linux/mm_types.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/mm_types.h	2015-09-08 00:51:03.460668141 -0300
@@ -322,6 +322,9 @@
 #ifdef CONFIG_NUMA
 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
 #endif
+#ifdef CONFIG_UKSM
+	struct vma_slot *uksm_vma_slot;
+#endif
 };
 
 struct core_thread {
diff -Nur linux-4.2/include/linux/mmzone.h linux-4.2-pck/include/linux/mmzone.h
--- linux-4.2/include/linux/mmzone.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/mmzone.h	2015-09-08 00:51:03.460668141 -0300
@@ -157,6 +157,9 @@
 	WORKINGSET_NODERECLAIM,
 	NR_ANON_TRANSPARENT_HUGEPAGES,
 	NR_FREE_CMA_PAGES,
+#ifdef CONFIG_UKSM
+	NR_UKSM_ZERO_PAGES,
+#endif
 	NR_VM_ZONE_STAT_ITEMS };
 
 /*
@@ -872,7 +875,7 @@
 }
 
 /**
- * is_highmem - helper function to quickly check if a struct zone is a 
+ * is_highmem - helper function to quickly check if a struct zone is a
  *              highmem zone or not.  This is an attempt to keep references
  *              to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
  * @zone - pointer to struct zone variable
diff -Nur linux-4.2/include/linux/page-flags.h linux-4.2-pck/include/linux/page-flags.h
--- linux-4.2/include/linux/page-flags.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/page-flags.h	2015-09-08 11:20:32.280354422 -0300
@@ -109,6 +109,12 @@
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	PG_compound_lock,
 #endif
+#ifdef CONFIG_TOI_INCREMENTAL
+	PG_toi_untracked,	/* Don't track dirtiness of this page - assume always dirty */
+	PG_toi_ro,		/* Page was made RO by TOI */
+	PG_toi_cbw,		/* Copy the page before it is written to */
+	PG_toi_dirty,		/* Page has been modified */
+#endif
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -288,6 +294,17 @@
 PAGEFLAG_FALSE(HWPoison)
 #define __PG_HWPOISON 0
 #endif
+#ifdef CONFIG_TOI_INCREMENTAL
+PAGEFLAG(TOI_RO, toi_ro)
+PAGEFLAG(TOI_Dirty, toi_dirty)
+PAGEFLAG(TOI_Untracked, toi_untracked)
+PAGEFLAG(TOI_CBW, toi_cbw)
+#else
+PAGEFLAG_FALSE(TOI_RO)
+PAGEFLAG_FALSE(TOI_Dirty)
+PAGEFLAG_FALSE(TOI_Untracked)
+PAGEFLAG_FALSE(TOI_CBW)
+#endif
 
 /*
  * On an anonymous page mapped into a user virtual memory area,
@@ -642,8 +659,12 @@
  * __PG_HWPOISON is exceptional because it needs to be kept beyond page's
  * alloc-free cycle to prevent from reusing the page.
  */
-#define PAGE_FLAGS_CHECK_AT_PREP	\
-	(((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
+#ifdef CONFIG_TOI_INCREMENTAL
+#define PAGE_FLAGS_CHECK_AT_PREP	(((1 << NR_PAGEFLAGS) - 1) & \
+        ~((1 << PG_toi_dirty) | (1 << PG_toi_ro) | ~__PG_HWPOISON))
+#else
+#define PAGE_FLAGS_CHECK_AT_PREP	(((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
+#endif
 
 #define PAGE_FLAGS_PRIVATE				\
 	(1 << PG_private | 1 << PG_private_2)
diff -Nur linux-4.2/include/linux/shmem_fs.h linux-4.2-pck/include/linux/shmem_fs.h
--- linux-4.2/include/linux/shmem_fs.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/shmem_fs.h	2015-09-08 11:20:32.283687592 -0300
@@ -48,9 +48,10 @@
 extern int shmem_init(void);
 extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
 extern struct file *shmem_file_setup(const char *name,
-					loff_t size, unsigned long flags);
+					loff_t size, unsigned long flags,
+					int atomic_copy);
 extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
-					    unsigned long flags);
+					    unsigned long flags, int atomic_copy);
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
 extern bool shmem_mapping(struct address_space *mapping);
diff -Nur linux-4.2/include/linux/sradix-tree.h linux-4.2-pck/include/linux/sradix-tree.h
--- linux-4.2/include/linux/sradix-tree.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/include/linux/sradix-tree.h	2015-09-08 00:51:03.460668141 -0300
@@ -0,0 +1,77 @@
+#ifndef _LINUX_SRADIX_TREE_H
+#define _LINUX_SRADIX_TREE_H
+
+
+#define INIT_SRADIX_TREE(root, mask)					\
+do {									\
+	(root)->height = 0;						\
+	(root)->gfp_mask = (mask);					\
+	(root)->rnode = NULL;						\
+} while (0)
+
+#define ULONG_BITS	(sizeof(unsigned long) * 8)
+#define SRADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
+//#define SRADIX_TREE_MAP_SHIFT	6
+//#define SRADIX_TREE_MAP_SIZE	(1UL << SRADIX_TREE_MAP_SHIFT)
+//#define SRADIX_TREE_MAP_MASK	(SRADIX_TREE_MAP_SIZE-1)
+
+struct sradix_tree_node {
+	unsigned int	height;		/* Height from the bottom */
+	unsigned int	count;		
+	unsigned int	fulls;		/* Number of full sublevel trees */ 
+	struct sradix_tree_node *parent;
+	void *stores[0];
+};
+
+/* A simple radix tree implementation */
+struct sradix_tree_root {
+        unsigned int            height;
+        struct sradix_tree_node *rnode;
+
+	/* Where found to have available empty stores in its sublevels */
+        struct sradix_tree_node *enter_node;
+	unsigned int shift;
+	unsigned int stores_size;
+	unsigned int mask;
+	unsigned long min;	/* The first hole index */
+	unsigned long num;
+	//unsigned long *height_to_maxindex;
+
+	/* How the node is allocated and freed. */
+	struct sradix_tree_node *(*alloc)(void); 
+	void (*free)(struct sradix_tree_node *node);
+
+	/* When a new node is added and removed */
+	void (*extend)(struct sradix_tree_node *parent, struct sradix_tree_node *child);
+	void (*assign)(struct sradix_tree_node *node, unsigned index, void *item);
+	void (*rm)(struct sradix_tree_node *node, unsigned offset);
+};
+
+struct sradix_tree_path {
+	struct sradix_tree_node *node;
+	int offset;
+};
+
+static inline 
+void init_sradix_tree_root(struct sradix_tree_root *root, unsigned long shift)
+{
+	root->height = 0;
+	root->rnode = NULL;
+	root->shift = shift;
+	root->stores_size = 1UL << shift;
+	root->mask = root->stores_size - 1;
+}
+
+
+extern void *sradix_tree_next(struct sradix_tree_root *root,
+		       struct sradix_tree_node *node, unsigned long index,
+		       int (*iter)(void *, unsigned long));
+
+extern int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num);
+
+extern void sradix_tree_delete_from_leaf(struct sradix_tree_root *root, 
+			struct sradix_tree_node *node, unsigned long index);
+
+extern void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index);
+
+#endif /* _LINUX_SRADIX_TREE_H */
diff -Nur linux-4.2/include/linux/suspend.h linux-4.2-pck/include/linux/suspend.h
--- linux-4.2/include/linux/suspend.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/suspend.h	2015-09-08 11:20:32.287020763 -0300
@@ -452,6 +452,74 @@
 #define pm_print_times_enabled	(false)
 #endif
 
+enum {
+	TOI_CAN_HIBERNATE,
+	TOI_CAN_RESUME,
+	TOI_RESUME_DEVICE_OK,
+	TOI_NORESUME_SPECIFIED,
+	TOI_SANITY_CHECK_PROMPT,
+	TOI_CONTINUE_REQ,
+	TOI_RESUMED_BEFORE,
+	TOI_BOOT_TIME,
+	TOI_NOW_RESUMING,
+	TOI_IGNORE_LOGLEVEL,
+	TOI_TRYING_TO_RESUME,
+	TOI_LOADING_ALT_IMAGE,
+	TOI_STOP_RESUME,
+	TOI_IO_STOPPED,
+	TOI_NOTIFIERS_PREPARE,
+	TOI_CLUSTER_MODE,
+	TOI_BOOT_KERNEL,
+	TOI_DEVICE_HOTPLUG_LOCKED,
+};
+
+#ifdef CONFIG_TOI
+
+/* Used in init dir files */
+extern unsigned long toi_state;
+#define set_toi_state(bit) (set_bit(bit, &toi_state))
+#define clear_toi_state(bit) (clear_bit(bit, &toi_state))
+#define test_toi_state(bit) (test_bit(bit, &toi_state))
+extern int toi_running;
+
+#define test_action_state(bit) (test_bit(bit, &toi_bkd.toi_action))
+extern int try_tuxonice_hibernate(void);
+
+#else /* !CONFIG_TOI */
+
+#define toi_state		(0)
+#define set_toi_state(bit) do { } while (0)
+#define clear_toi_state(bit) do { } while (0)
+#define test_toi_state(bit) (0)
+#define toi_running (0)
+
+static inline int try_tuxonice_hibernate(void) { return 0; }
+#define test_action_state(bit) (0)
+
+#endif /* CONFIG_TOI */
+
+#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_TOI
+extern void try_tuxonice_resume(void);
+#else
+#define try_tuxonice_resume() do { } while (0)
+#endif
+
+extern int resume_attempted;
+extern int software_resume(void);
+
+static inline void check_resume_attempted(void)
+{
+	if (resume_attempted)
+		return;
+
+	software_resume();
+}
+#else
+#define check_resume_attempted() do { } while (0)
+#define resume_attempted (0)
+#endif
+
 #ifdef CONFIG_PM_AUTOSLEEP
 
 /* kernel/power/autosleep.c */
diff -Nur linux-4.2/include/linux/swap.h linux-4.2-pck/include/linux/swap.h
--- linux-4.2/include/linux/swap.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/swap.h	2015-09-08 11:20:32.287020763 -0300
@@ -289,6 +289,7 @@
 extern unsigned long totalreserve_pages;
 extern unsigned long dirty_balance_reserve;
 extern unsigned long nr_free_buffer_pages(void);
+extern unsigned long nr_unallocated_buffer_pages(void);
 extern unsigned long nr_free_pagecache_pages(void);
 
 /* Definition of global_page_state not available yet */
@@ -328,6 +329,8 @@
 						struct zone *zone,
 						unsigned long *nr_scanned);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
+extern unsigned long shrink_memory_mask(unsigned long nr_to_reclaim,
+		gfp_t mask);
 extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
 extern unsigned long vm_total_pages;
@@ -428,13 +431,17 @@
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
+extern sector_t map_swap_entry(swp_entry_t entry, struct block_device **);
 extern sector_t map_swap_page(struct page *, struct block_device **);
 extern sector_t swapdev_block(int, pgoff_t);
+extern struct swap_info_struct *get_swap_info_struct(unsigned);
 extern int page_swapcount(struct page *);
 extern struct swap_info_struct *page_swap_info(struct page *);
 extern int reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
+extern void get_swap_range_of_type(int type, swp_entry_t *start,
+		swp_entry_t *end, unsigned int limit);
 
 #else /* CONFIG_SWAP */
 
diff -Nur linux-4.2/include/linux/tcp.h linux-4.2-pck/include/linux/tcp.h
--- linux-4.2/include/linux/tcp.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/tcp.h	2015-09-08 00:48:31.501257347 -0300
@@ -19,6 +19,7 @@
 
 
 #include <linux/skbuff.h>
+#include <linux/cryptohash.h>
 #include <net/sock.h>
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -328,6 +329,21 @@
 	struct tcp_md5sig_info	__rcu *md5sig_info;
 #endif
 
+#ifdef CONFIG_TCP_STEALTH
+/* Stealth TCP socket configuration */
+	struct {
+		#define TCP_STEALTH_MODE_AUTH		BIT(0)
+		#define TCP_STEALTH_MODE_INTEGRITY	BIT(1)
+		#define TCP_STEALTH_MODE_INTEGRITY_LEN	BIT(2)
+		u8 mode;
+		u8 secret[MD5_MESSAGE_BYTES];
+		u16 integrity_hash;
+		size_t integrity_len;
+		struct skb_mstamp mstamp;
+		bool saw_tsval;
+	} stealth;
+#endif
+
 /* TCP fastopen related information */
 	struct tcp_fastopen_request *fastopen_req;
 	/* fastopen_rsk points to request_sock that resulted in this big
diff -Nur linux-4.2/include/linux/thinkpad_ec.h linux-4.2-pck/include/linux/thinkpad_ec.h
--- linux-4.2/include/linux/thinkpad_ec.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/include/linux/thinkpad_ec.h	2015-09-08 00:48:22.228363880 -0300
@@ -0,0 +1,47 @@
+/*
+ *  thinkpad_ec.h - interface to ThinkPad embedded controller LPC3 functions
+ *
+ *  Copyright (C) 2005 Shem Multinymous <multinymous@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _THINKPAD_EC_H
+#define _THINKPAD_EC_H
+
+#ifdef __KERNEL__
+
+#define TP_CONTROLLER_ROW_LEN 16
+
+/* EC transactions input and output (possibly partial) vectors of 16 bytes. */
+struct thinkpad_ec_row {
+	u16 mask; /* bitmap of which entries of val[] are meaningful */
+	u8 val[TP_CONTROLLER_ROW_LEN];
+};
+
+extern int __must_check thinkpad_ec_lock(void);
+extern int __must_check thinkpad_ec_try_lock(void);
+extern void thinkpad_ec_unlock(void);
+
+extern int thinkpad_ec_read_row(const struct thinkpad_ec_row *args,
+				struct thinkpad_ec_row *data);
+extern int thinkpad_ec_try_read_row(const struct thinkpad_ec_row *args,
+				    struct thinkpad_ec_row *mask);
+extern int thinkpad_ec_prefetch_row(const struct thinkpad_ec_row *args);
+extern void thinkpad_ec_invalidate(void);
+
+
+#endif /* __KERNEL */
+#endif /* _THINKPAD_EC_H */
diff -Nur linux-4.2/include/linux/thread_info.h linux-4.2-pck/include/linux/thread_info.h
--- linux-4.2/include/linux/thread_info.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/linux/thread_info.h	2015-09-08 11:20:32.287020763 -0300
@@ -56,9 +56,9 @@
 #ifdef __KERNEL__
 
 #ifdef CONFIG_DEBUG_STACK_USAGE
-# define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
+# define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK | ___GFP_TOI_NOTRACK | __GFP_ZERO)
 #else
-# define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK)
+# define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK | ___GFP_TOI_NOTRACK)
 #endif
 
 /*
diff -Nur linux-4.2/include/linux/tuxonice.h linux-4.2-pck/include/linux/tuxonice.h
--- linux-4.2/include/linux/tuxonice.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/include/linux/tuxonice.h	2015-09-08 11:20:32.287020763 -0300
@@ -0,0 +1,48 @@
+/*
+ * include/linux/tuxonice.h
+ *
+ * Copyright (C) 2015 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef INCLUDE_LINUX_TUXONICE_H
+#define INCLUDE_LINUX_TUXONICE_H
+#ifdef CONFIG_TOI_INCREMENTAL
+extern void toi_set_logbuf_untracked(void);
+extern int toi_make_writable(pgd_t *pgd, unsigned long address);
+
+static inline int toi_incremental_support(void)
+{
+    return 1;
+}
+
+/* Copy Before Write */
+struct toi_cbw {
+    unsigned long pfn;
+    void *virt;
+    struct toi_cbw *next;
+};
+
+struct toi_cbw_state {
+    bool active;            /* Is a fault handler running? */
+    bool enabled;           /* Are we doing copy before write? */
+    int size;               /* The number of pages allocated */
+    struct toi_cbw *first, *next, *last;  /* Pointers to the data structure */
+};
+
+#define CBWS_PER_PAGE (PAGE_SIZE / sizeof(struct toi_cbw))
+DECLARE_PER_CPU(struct toi_cbw_state, toi_cbw_states);
+#else
+#define toi_set_logbuf_untracked() do { } while(0)
+static inline int toi_make_writable(pgd_t *pgd, unsigned long addr)
+{
+    return 0;
+}
+
+static inline int toi_incremental_support(void)
+{
+    return 0;
+}
+#endif
+#endif
diff -Nur linux-4.2/include/linux/uksm.h linux-4.2-pck/include/linux/uksm.h
--- linux-4.2/include/linux/uksm.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/include/linux/uksm.h	2015-09-08 00:51:03.460668141 -0300
@@ -0,0 +1,146 @@
+#ifndef __LINUX_UKSM_H
+#define __LINUX_UKSM_H
+/*
+ * Memory merging support.
+ *
+ * This code enables dynamic sharing of identical pages found in different
+ * memory areas, even if they are not shared by fork().
+ */
+
+/* if !CONFIG_UKSM this file should not be compiled at all. */
+#ifdef CONFIG_UKSM
+
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/sched.h>
+
+extern unsigned long zero_pfn __read_mostly;
+extern unsigned long uksm_zero_pfn __read_mostly;
+extern struct page *empty_uksm_zero_page;
+
+/* must be done before linked to mm */
+extern void uksm_vma_add_new(struct vm_area_struct *vma);
+extern void uksm_remove_vma(struct vm_area_struct *vma);
+
+#define UKSM_SLOT_NEED_SORT	(1 << 0)
+#define UKSM_SLOT_NEED_RERAND 	(1 << 1)
+#define UKSM_SLOT_SCANNED     	(1 << 2) /* It's scanned in this round */
+#define UKSM_SLOT_FUL_SCANNED 	(1 << 3)
+#define UKSM_SLOT_IN_UKSM 	(1 << 4)
+
+struct vma_slot {
+	struct sradix_tree_node *snode;
+	unsigned long sindex;
+
+	struct list_head slot_list;
+	unsigned long fully_scanned_round;
+	unsigned long dedup_num;
+	unsigned long pages_scanned;
+	unsigned long last_scanned;
+	unsigned long pages_to_scan;
+	struct scan_rung *rung;
+	struct page **rmap_list_pool;
+	unsigned int *pool_counts;
+	unsigned long pool_size;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	unsigned long ctime_j;
+	unsigned long pages;
+	unsigned long flags;
+	unsigned long pages_cowed; /* pages cowed this round */
+	unsigned long pages_merged; /* pages merged this round */
+	unsigned long pages_bemerged;
+
+	/* when it has page merged in this eval round */
+	struct list_head dedup_list;
+};
+
+static inline void uksm_unmap_zero_page(pte_t pte)
+{
+	if (pte_pfn(pte) == uksm_zero_pfn)
+		__dec_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
+}
+
+static inline void uksm_map_zero_page(pte_t pte)
+{
+	if (pte_pfn(pte) == uksm_zero_pfn)
+		__inc_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
+}
+
+static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
+{
+	if (vma->uksm_vma_slot && PageKsm(page))
+		vma->uksm_vma_slot->pages_cowed++;
+}
+
+static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
+{
+	if (vma->uksm_vma_slot && pte_pfn(pte) == uksm_zero_pfn)
+		vma->uksm_vma_slot->pages_cowed++;
+}
+
+static inline int uksm_flags_can_scan(unsigned long vm_flags)
+{
+#ifndef VM_SAO
+#define VM_SAO 0
+#endif
+	return !(vm_flags & (VM_PFNMAP | VM_IO  | VM_DONTEXPAND |
+			     VM_HUGETLB | VM_MIXEDMAP | VM_SHARED
+			     | VM_MAYSHARE | VM_GROWSUP | VM_GROWSDOWN | VM_SAO));
+}
+
+static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
+{
+	if (uksm_flags_can_scan(*vm_flags_p))
+		*vm_flags_p |= VM_MERGEABLE;
+}
+
+/*
+ * Just a wrapper for BUG_ON for where ksm_zeropage must not be. TODO: it will
+ * be removed when uksm zero page patch is stable enough.
+ */
+static inline void uksm_bugon_zeropage(pte_t pte)
+{
+	BUG_ON(pte_pfn(pte) == uksm_zero_pfn);
+}
+#else
+static inline void uksm_vma_add_new(struct vm_area_struct *vma)
+{
+}
+
+static inline void uksm_remove_vma(struct vm_area_struct *vma)
+{
+}
+
+static inline void uksm_unmap_zero_page(pte_t pte)
+{
+}
+
+static inline void uksm_map_zero_page(pte_t pte)
+{
+}
+
+static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
+{
+}
+
+static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
+{
+}
+
+static inline int uksm_flags_can_scan(unsigned long vm_flags)
+{
+	return 0;
+}
+
+static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
+{
+}
+
+static inline void uksm_bugon_zeropage(pte_t pte)
+{
+}
+#endif /* !CONFIG_UKSM */
+#endif /* __LINUX_UKSM_H */
diff -Nur linux-4.2/include/net/netfilter/nf_conntrack.h linux-4.2-pck/include/net/netfilter/nf_conntrack.h
--- linux-4.2/include/net/netfilter/nf_conntrack.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/net/netfilter/nf_conntrack.h	2015-09-08 00:48:22.228363880 -0300
@@ -292,6 +292,7 @@
 void init_nf_conntrack_hash_rnd(void);
 
 struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags);
+void nf_ct_tmpl_free(struct nf_conn *tmpl);
 
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
diff -Nur linux-4.2/include/net/secure_seq.h linux-4.2-pck/include/net/secure_seq.h
--- linux-4.2/include/net/secure_seq.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/net/secure_seq.h	2015-09-08 00:48:31.501257347 -0300
@@ -14,5 +14,10 @@
 				__be16 sport, __be16 dport);
 u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 				  __be16 sport, __be16 dport);
+#ifdef CONFIG_TCP_STEALTH
+u32 tcp_stealth_do_auth(struct sock *sk, struct sk_buff *skb);
+u32 tcp_stealth_sequence_number(struct sock *sk, __be32 *daddr,
+				u32 daddr_size, __be16 dport);
+#endif
 
 #endif /* _NET_SECURE_SEQ */
diff -Nur linux-4.2/include/net/tcp.h linux-4.2-pck/include/net/tcp.h
--- linux-4.2/include/net/tcp.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/net/tcp.h	2015-09-08 00:48:31.501257347 -0300
@@ -440,6 +440,12 @@
 		       struct tcp_options_received *opt_rx,
 		       int estab, struct tcp_fastopen_cookie *foc);
 const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
+#ifdef CONFIG_TCP_STEALTH
+const bool tcp_parse_tsval_option(u32 *tsval, const struct tcphdr *th);
+int tcp_stealth_integrity(u16 *hash, u8 *secret, u8 *payload, int len);
+#define be32_isn_to_be16_av(x)	(((__be16 *)&x)[0])
+#define be32_isn_to_be16_ih(x)	(((__be16 *)&x)[1])
+#endif
 
 /*
  *	TCP v4 functions exported for the inet6 API
diff -Nur linux-4.2/include/trace/events/fs.h linux-4.2-pck/include/trace/events/fs.h
--- linux-4.2/include/trace/events/fs.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/include/trace/events/fs.h	2015-09-08 00:48:22.228363880 -0300
@@ -0,0 +1,53 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fs
+
+#if !defined(_TRACE_FS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FS_H
+
+#include <linux/fs.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(do_sys_open,
+
+	TP_PROTO(const char *filename, int flags, int mode),
+
+	TP_ARGS(filename, flags, mode),
+
+	TP_STRUCT__entry(
+		__string(	filename, filename		)
+		__field(	int, flags			)
+		__field(	int, mode			)
+	),
+
+	TP_fast_assign(
+		__assign_str(filename, filename);
+		__entry->flags = flags;
+		__entry->mode = mode;
+	),
+
+	TP_printk("\"%s\" %x %o",
+		  __get_str(filename), __entry->flags, __entry->mode)
+);
+
+TRACE_EVENT(open_exec,
+
+	TP_PROTO(const char *filename),
+
+	TP_ARGS(filename),
+
+	TP_STRUCT__entry(
+		__string(	filename, filename		)
+	),
+
+	TP_fast_assign(
+		__assign_str(filename, filename);
+	),
+
+	TP_printk("\"%s\"",
+		  __get_str(filename))
+);
+
+#endif /* _TRACE_FS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff -Nur linux-4.2/include/uapi/linux/Kbuild linux-4.2-pck/include/uapi/linux/Kbuild
--- linux-4.2/include/uapi/linux/Kbuild	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/uapi/linux/Kbuild	2015-09-08 00:48:22.228363880 -0300
@@ -216,6 +216,7 @@
 header-y += jffs2.h
 header-y += joystick.h
 header-y += kcmp.h
+header-y += kdbus.h
 header-y += kdev_t.h
 header-y += kd.h
 header-y += kernelcapi.h
diff -Nur linux-4.2/include/uapi/linux/kdbus.h linux-4.2-pck/include/uapi/linux/kdbus.h
--- linux-4.2/include/uapi/linux/kdbus.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/include/uapi/linux/kdbus.h	2015-09-08 00:48:22.231697056 -0300
@@ -0,0 +1,984 @@
+/*
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef _UAPI_KDBUS_H_
+#define _UAPI_KDBUS_H_
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define KDBUS_IOCTL_MAGIC		0x95
+#define KDBUS_SRC_ID_KERNEL		(0)
+#define KDBUS_DST_ID_NAME		(0)
+#define KDBUS_MATCH_ID_ANY		(~0ULL)
+#define KDBUS_DST_ID_BROADCAST		(~0ULL)
+#define KDBUS_FLAG_NEGOTIATE		(1ULL << 63)
+
+/**
+ * struct kdbus_notify_id_change - name registry change message
+ * @id:			New or former owner of the name
+ * @flags:		flags field from KDBUS_HELLO_*
+ *
+ * Sent from kernel to userspace when the owner or activator of
+ * a well-known name changes.
+ *
+ * Attached to:
+ *   KDBUS_ITEM_ID_ADD
+ *   KDBUS_ITEM_ID_REMOVE
+ */
+struct kdbus_notify_id_change {
+	__u64 id;
+	__u64 flags;
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_notify_name_change - name registry change message
+ * @old_id:		ID and flags of former owner of a name
+ * @new_id:		ID and flags of new owner of a name
+ * @name:		Well-known name
+ *
+ * Sent from kernel to userspace when the owner or activator of
+ * a well-known name changes.
+ *
+ * Attached to:
+ *   KDBUS_ITEM_NAME_ADD
+ *   KDBUS_ITEM_NAME_REMOVE
+ *   KDBUS_ITEM_NAME_CHANGE
+ */
+struct kdbus_notify_name_change {
+	struct kdbus_notify_id_change old_id;
+	struct kdbus_notify_id_change new_id;
+	char name[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_creds - process credentials
+ * @uid:		User ID
+ * @euid:		Effective UID
+ * @suid:		Saved UID
+ * @fsuid:		Filesystem UID
+ * @gid:		Group ID
+ * @egid:		Effective GID
+ * @sgid:		Saved GID
+ * @fsgid:		Filesystem GID
+ *
+ * Attached to:
+ *   KDBUS_ITEM_CREDS
+ */
+struct kdbus_creds {
+	__u64 uid;
+	__u64 euid;
+	__u64 suid;
+	__u64 fsuid;
+	__u64 gid;
+	__u64 egid;
+	__u64 sgid;
+	__u64 fsgid;
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_pids - process identifiers
+ * @pid:		Process ID
+ * @tid:		Thread ID
+ * @ppid:		Parent process ID
+ *
+ * The PID and TID of a process.
+ *
+ * Attached to:
+ *   KDBUS_ITEM_PIDS
+ */
+struct kdbus_pids {
+	__u64 pid;
+	__u64 tid;
+	__u64 ppid;
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_caps - process capabilities
+ * @last_cap:	Highest currently known capability bit
+ * @caps:	Variable number of 32-bit capabilities flags
+ *
+ * Contains a variable number of 32-bit capabilities flags.
+ *
+ * Attached to:
+ *   KDBUS_ITEM_CAPS
+ */
+struct kdbus_caps {
+	__u32 last_cap;
+	__u32 caps[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_audit - audit information
+ * @sessionid:		The audit session ID
+ * @loginuid:		The audit login uid
+ *
+ * Attached to:
+ *   KDBUS_ITEM_AUDIT
+ */
+struct kdbus_audit {
+	__u32 sessionid;
+	__u32 loginuid;
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_timestamp
+ * @seqnum:		Global per-domain message sequence number
+ * @monotonic_ns:	Monotonic timestamp, in nanoseconds
+ * @realtime_ns:	Realtime timestamp, in nanoseconds
+ *
+ * Attached to:
+ *   KDBUS_ITEM_TIMESTAMP
+ */
+struct kdbus_timestamp {
+	__u64 seqnum;
+	__u64 monotonic_ns;
+	__u64 realtime_ns;
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_vec - I/O vector for kdbus payload items
+ * @size:		The size of the vector
+ * @address:		Memory address of data buffer
+ * @offset:		Offset in the in-message payload memory,
+ *			relative to the message head
+ *
+ * Attached to:
+ *   KDBUS_ITEM_PAYLOAD_VEC, KDBUS_ITEM_PAYLOAD_OFF
+ */
+struct kdbus_vec {
+	__u64 size;
+	union {
+		__u64 address;
+		__u64 offset;
+	};
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_bloom_parameter - bus-wide bloom parameters
+ * @size:		Size of the bit field in bytes (m / 8)
+ * @n_hash:		Number of hash functions used (k)
+ */
+struct kdbus_bloom_parameter {
+	__u64 size;
+	__u64 n_hash;
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_bloom_filter - bloom filter containing n elements
+ * @generation:		Generation of the element set in the filter
+ * @data:		Bit field, multiple of 8 bytes
+ */
+struct kdbus_bloom_filter {
+	__u64 generation;
+	__u64 data[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_memfd - a kdbus memfd
+ * @start:		The offset into the memfd where the segment starts
+ * @size:		The size of the memfd segment
+ * @fd:			The file descriptor number
+ * @__pad:		Padding to ensure proper alignment and size
+ *
+ * Attached to:
+ *   KDBUS_ITEM_PAYLOAD_MEMFD
+ */
+struct kdbus_memfd {
+	__u64 start;
+	__u64 size;
+	int fd;
+	__u32 __pad;
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_name - a registered well-known name with its flags
+ * @flags:		Flags from KDBUS_NAME_*
+ * @name:		Well-known name
+ *
+ * Attached to:
+ *   KDBUS_ITEM_OWNED_NAME
+ */
+struct kdbus_name {
+	__u64 flags;
+	char name[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * enum kdbus_policy_access_type - permissions of a policy record
+ * @_KDBUS_POLICY_ACCESS_NULL:	Uninitialized/invalid
+ * @KDBUS_POLICY_ACCESS_USER:	Grant access to a uid
+ * @KDBUS_POLICY_ACCESS_GROUP:	Grant access to gid
+ * @KDBUS_POLICY_ACCESS_WORLD:	World-accessible
+ */
+enum kdbus_policy_access_type {
+	_KDBUS_POLICY_ACCESS_NULL,
+	KDBUS_POLICY_ACCESS_USER,
+	KDBUS_POLICY_ACCESS_GROUP,
+	KDBUS_POLICY_ACCESS_WORLD,
+};
+
+/**
+ * enum kdbus_policy_access_flags - mode flags
+ * @KDBUS_POLICY_OWN:		Allow to own a well-known name
+ *				Implies KDBUS_POLICY_TALK and KDBUS_POLICY_SEE
+ * @KDBUS_POLICY_TALK:		Allow communication to a well-known name
+ *				Implies KDBUS_POLICY_SEE
+ * @KDBUS_POLICY_SEE:		Allow to see a well-known name
+ */
+enum kdbus_policy_type {
+	KDBUS_POLICY_SEE	= 0,
+	KDBUS_POLICY_TALK,
+	KDBUS_POLICY_OWN,
+};
+
+/**
+ * struct kdbus_policy_access - policy access item
+ * @type:		One of KDBUS_POLICY_ACCESS_* types
+ * @access:		Access to grant
+ * @id:			For KDBUS_POLICY_ACCESS_USER, the uid
+ *			For KDBUS_POLICY_ACCESS_GROUP, the gid
+ */
+struct kdbus_policy_access {
+	__u64 type;	/* USER, GROUP, WORLD */
+	__u64 access;	/* OWN, TALK, SEE */
+	__u64 id;	/* uid, gid, 0 */
+} __attribute__((__aligned__(8)));
+
+/**
+ * enum kdbus_attach_flags - flags for metadata attachments
+ * @KDBUS_ATTACH_TIMESTAMP:		Timestamp
+ * @KDBUS_ATTACH_CREDS:			Credentials
+ * @KDBUS_ATTACH_PIDS:			PIDs
+ * @KDBUS_ATTACH_AUXGROUPS:		Auxiliary groups
+ * @KDBUS_ATTACH_NAMES:			Well-known names
+ * @KDBUS_ATTACH_TID_COMM:		The "comm" process identifier of the TID
+ * @KDBUS_ATTACH_PID_COMM:		The "comm" process identifier of the PID
+ * @KDBUS_ATTACH_EXE:			The path of the executable
+ * @KDBUS_ATTACH_CMDLINE:		The process command line
+ * @KDBUS_ATTACH_CGROUP:		The croup membership
+ * @KDBUS_ATTACH_CAPS:			The process capabilities
+ * @KDBUS_ATTACH_SECLABEL:		The security label
+ * @KDBUS_ATTACH_AUDIT:			The audit IDs
+ * @KDBUS_ATTACH_CONN_DESCRIPTION:	The human-readable connection name
+ * @_KDBUS_ATTACH_ALL:			All of the above
+ * @_KDBUS_ATTACH_ANY:			Wildcard match to enable any kind of
+ *					metatdata.
+ */
+enum kdbus_attach_flags {
+	KDBUS_ATTACH_TIMESTAMP		=  1ULL <<  0,
+	KDBUS_ATTACH_CREDS		=  1ULL <<  1,
+	KDBUS_ATTACH_PIDS		=  1ULL <<  2,
+	KDBUS_ATTACH_AUXGROUPS		=  1ULL <<  3,
+	KDBUS_ATTACH_NAMES		=  1ULL <<  4,
+	KDBUS_ATTACH_TID_COMM		=  1ULL <<  5,
+	KDBUS_ATTACH_PID_COMM		=  1ULL <<  6,
+	KDBUS_ATTACH_EXE		=  1ULL <<  7,
+	KDBUS_ATTACH_CMDLINE		=  1ULL <<  8,
+	KDBUS_ATTACH_CGROUP		=  1ULL <<  9,
+	KDBUS_ATTACH_CAPS		=  1ULL << 10,
+	KDBUS_ATTACH_SECLABEL		=  1ULL << 11,
+	KDBUS_ATTACH_AUDIT		=  1ULL << 12,
+	KDBUS_ATTACH_CONN_DESCRIPTION	=  1ULL << 13,
+	_KDBUS_ATTACH_ALL		=  (1ULL << 14) - 1,
+	_KDBUS_ATTACH_ANY		=  ~0ULL
+};
+
+/**
+ * enum kdbus_item_type - item types to chain data in a list
+ * @_KDBUS_ITEM_NULL:			Uninitialized/invalid
+ * @_KDBUS_ITEM_USER_BASE:		Start of user items
+ * @KDBUS_ITEM_NEGOTIATE:		Negotiate supported items
+ * @KDBUS_ITEM_PAYLOAD_VEC:		Vector to data
+ * @KDBUS_ITEM_PAYLOAD_OFF:		Data at returned offset to message head
+ * @KDBUS_ITEM_PAYLOAD_MEMFD:		Data as sealed memfd
+ * @KDBUS_ITEM_FDS:			Attached file descriptors
+ * @KDBUS_ITEM_CANCEL_FD:		FD used to cancel a synchronous
+ *					operation by writing to it from
+ *					userspace
+ * @KDBUS_ITEM_BLOOM_PARAMETER:		Bus-wide bloom parameters, used with
+ *					KDBUS_CMD_BUS_MAKE, carries a
+ *					struct kdbus_bloom_parameter
+ * @KDBUS_ITEM_BLOOM_FILTER:		Bloom filter carried with a message,
+ *					used to match against a bloom mask of a
+ *					connection, carries a struct
+ *					kdbus_bloom_filter
+ * @KDBUS_ITEM_BLOOM_MASK:		Bloom mask used to match against a
+ *					message'sbloom filter
+ * @KDBUS_ITEM_DST_NAME:		Destination's well-known name
+ * @KDBUS_ITEM_MAKE_NAME:		Name of domain, bus, endpoint
+ * @KDBUS_ITEM_ATTACH_FLAGS_SEND:	Attach-flags, used for updating which
+ *					metadata a connection opts in to send
+ * @KDBUS_ITEM_ATTACH_FLAGS_RECV:	Attach-flags, used for updating which
+ *					metadata a connection requests to
+ *					receive for each reeceived message
+ * @KDBUS_ITEM_ID:			Connection ID
+ * @KDBUS_ITEM_NAME:			Well-know name with flags
+ * @_KDBUS_ITEM_ATTACH_BASE:		Start of metadata attach items
+ * @KDBUS_ITEM_TIMESTAMP:		Timestamp
+ * @KDBUS_ITEM_CREDS:			Process credentials
+ * @KDBUS_ITEM_PIDS:			Process identifiers
+ * @KDBUS_ITEM_AUXGROUPS:		Auxiliary process groups
+ * @KDBUS_ITEM_OWNED_NAME:		A name owned by the associated
+ *					connection
+ * @KDBUS_ITEM_TID_COMM:		Thread ID "comm" identifier
+ *					(Don't trust this, see below.)
+ * @KDBUS_ITEM_PID_COMM:		Process ID "comm" identifier
+ *					(Don't trust this, see below.)
+ * @KDBUS_ITEM_EXE:			The path of the executable
+ *					(Don't trust this, see below.)
+ * @KDBUS_ITEM_CMDLINE:			The process command line
+ *					(Don't trust this, see below.)
+ * @KDBUS_ITEM_CGROUP:			The croup membership
+ * @KDBUS_ITEM_CAPS:			The process capabilities
+ * @KDBUS_ITEM_SECLABEL:		The security label
+ * @KDBUS_ITEM_AUDIT:			The audit IDs
+ * @KDBUS_ITEM_CONN_DESCRIPTION:	The connection's human-readable name
+ *					(debugging)
+ * @_KDBUS_ITEM_POLICY_BASE:		Start of policy items
+ * @KDBUS_ITEM_POLICY_ACCESS:		Policy access block
+ * @_KDBUS_ITEM_KERNEL_BASE:		Start of kernel-generated message items
+ * @KDBUS_ITEM_NAME_ADD:		Notification in kdbus_notify_name_change
+ * @KDBUS_ITEM_NAME_REMOVE:		Notification in kdbus_notify_name_change
+ * @KDBUS_ITEM_NAME_CHANGE:		Notification in kdbus_notify_name_change
+ * @KDBUS_ITEM_ID_ADD:			Notification in kdbus_notify_id_change
+ * @KDBUS_ITEM_ID_REMOVE:		Notification in kdbus_notify_id_change
+ * @KDBUS_ITEM_REPLY_TIMEOUT:		Timeout has been reached
+ * @KDBUS_ITEM_REPLY_DEAD:		Destination died
+ *
+ * N.B: The process and thread COMM fields, as well as the CMDLINE and
+ * EXE fields may be altered by unprivileged processes und should
+ * hence *not* used for security decisions. Peers should make use of
+ * these items only for informational purposes, such as generating log
+ * records.
+ */
+enum kdbus_item_type {
+	_KDBUS_ITEM_NULL,
+	_KDBUS_ITEM_USER_BASE,
+	KDBUS_ITEM_NEGOTIATE	= _KDBUS_ITEM_USER_BASE,
+	KDBUS_ITEM_PAYLOAD_VEC,
+	KDBUS_ITEM_PAYLOAD_OFF,
+	KDBUS_ITEM_PAYLOAD_MEMFD,
+	KDBUS_ITEM_FDS,
+	KDBUS_ITEM_CANCEL_FD,
+	KDBUS_ITEM_BLOOM_PARAMETER,
+	KDBUS_ITEM_BLOOM_FILTER,
+	KDBUS_ITEM_BLOOM_MASK,
+	KDBUS_ITEM_DST_NAME,
+	KDBUS_ITEM_MAKE_NAME,
+	KDBUS_ITEM_ATTACH_FLAGS_SEND,
+	KDBUS_ITEM_ATTACH_FLAGS_RECV,
+	KDBUS_ITEM_ID,
+	KDBUS_ITEM_NAME,
+	KDBUS_ITEM_DST_ID,
+
+	/* keep these item types in sync with KDBUS_ATTACH_* flags */
+	_KDBUS_ITEM_ATTACH_BASE	= 0x1000,
+	KDBUS_ITEM_TIMESTAMP	= _KDBUS_ITEM_ATTACH_BASE,
+	KDBUS_ITEM_CREDS,
+	KDBUS_ITEM_PIDS,
+	KDBUS_ITEM_AUXGROUPS,
+	KDBUS_ITEM_OWNED_NAME,
+	KDBUS_ITEM_TID_COMM,
+	KDBUS_ITEM_PID_COMM,
+	KDBUS_ITEM_EXE,
+	KDBUS_ITEM_CMDLINE,
+	KDBUS_ITEM_CGROUP,
+	KDBUS_ITEM_CAPS,
+	KDBUS_ITEM_SECLABEL,
+	KDBUS_ITEM_AUDIT,
+	KDBUS_ITEM_CONN_DESCRIPTION,
+
+	_KDBUS_ITEM_POLICY_BASE	= 0x2000,
+	KDBUS_ITEM_POLICY_ACCESS = _KDBUS_ITEM_POLICY_BASE,
+
+	_KDBUS_ITEM_KERNEL_BASE	= 0x8000,
+	KDBUS_ITEM_NAME_ADD	= _KDBUS_ITEM_KERNEL_BASE,
+	KDBUS_ITEM_NAME_REMOVE,
+	KDBUS_ITEM_NAME_CHANGE,
+	KDBUS_ITEM_ID_ADD,
+	KDBUS_ITEM_ID_REMOVE,
+	KDBUS_ITEM_REPLY_TIMEOUT,
+	KDBUS_ITEM_REPLY_DEAD,
+};
+
+/**
+ * struct kdbus_item - chain of data blocks
+ * @size:		Overall data record size
+ * @type:		Kdbus_item type of data
+ * @data:		Generic bytes
+ * @data32:		Generic 32 bit array
+ * @data64:		Generic 64 bit array
+ * @str:		Generic string
+ * @id:			Connection ID
+ * @vec:		KDBUS_ITEM_PAYLOAD_VEC
+ * @creds:		KDBUS_ITEM_CREDS
+ * @audit:		KDBUS_ITEM_AUDIT
+ * @timestamp:		KDBUS_ITEM_TIMESTAMP
+ * @name:		KDBUS_ITEM_NAME
+ * @bloom_parameter:	KDBUS_ITEM_BLOOM_PARAMETER
+ * @bloom_filter:	KDBUS_ITEM_BLOOM_FILTER
+ * @memfd:		KDBUS_ITEM_PAYLOAD_MEMFD
+ * @name_change:	KDBUS_ITEM_NAME_ADD
+ *			KDBUS_ITEM_NAME_REMOVE
+ *			KDBUS_ITEM_NAME_CHANGE
+ * @id_change:		KDBUS_ITEM_ID_ADD
+ *			KDBUS_ITEM_ID_REMOVE
+ * @policy:		KDBUS_ITEM_POLICY_ACCESS
+ */
+struct kdbus_item {
+	__u64 size;
+	__u64 type;
+	union {
+		__u8 data[0];
+		__u32 data32[0];
+		__u64 data64[0];
+		char str[0];
+
+		__u64 id;
+		struct kdbus_vec vec;
+		struct kdbus_creds creds;
+		struct kdbus_pids pids;
+		struct kdbus_audit audit;
+		struct kdbus_caps caps;
+		struct kdbus_timestamp timestamp;
+		struct kdbus_name name;
+		struct kdbus_bloom_parameter bloom_parameter;
+		struct kdbus_bloom_filter bloom_filter;
+		struct kdbus_memfd memfd;
+		int fds[0];
+		struct kdbus_notify_name_change name_change;
+		struct kdbus_notify_id_change id_change;
+		struct kdbus_policy_access policy_access;
+	};
+} __attribute__((__aligned__(8)));
+
+/**
+ * enum kdbus_msg_flags - type of message
+ * @KDBUS_MSG_EXPECT_REPLY:	Expect a reply message, used for
+ *				method calls. The userspace-supplied
+ *				cookie identifies the message and the
+ *				respective reply carries the cookie
+ *				in cookie_reply
+ * @KDBUS_MSG_NO_AUTO_START:	Do not start a service if the addressed
+ *				name is not currently active. This flag is
+ *				not looked at by the kernel but only
+ *				serves as hint for userspace implementations.
+ * @KDBUS_MSG_SIGNAL:		Treat this message as signal
+ */
+enum kdbus_msg_flags {
+	KDBUS_MSG_EXPECT_REPLY	= 1ULL << 0,
+	KDBUS_MSG_NO_AUTO_START	= 1ULL << 1,
+	KDBUS_MSG_SIGNAL	= 1ULL << 2,
+};
+
+/**
+ * enum kdbus_payload_type - type of payload carried by message
+ * @KDBUS_PAYLOAD_KERNEL:	Kernel-generated simple message
+ * @KDBUS_PAYLOAD_DBUS:		D-Bus marshalling "DBusDBus"
+ *
+ * Any payload-type is accepted. Common types will get added here once
+ * established.
+ */
+enum kdbus_payload_type {
+	KDBUS_PAYLOAD_KERNEL,
+	KDBUS_PAYLOAD_DBUS	= 0x4442757344427573ULL,
+};
+
+/**
+ * struct kdbus_msg - the representation of a kdbus message
+ * @size:		Total size of the message
+ * @flags:		Message flags (KDBUS_MSG_*), userspace → kernel
+ * @priority:		Message queue priority value
+ * @dst_id:		64-bit ID of the destination connection
+ * @src_id:		64-bit ID of the source connection
+ * @payload_type:	Payload type (KDBUS_PAYLOAD_*)
+ * @cookie:		Userspace-supplied cookie, for the connection
+ *			to identify its messages
+ * @timeout_ns:		The time to wait for a message reply from the peer.
+ *			If there is no reply, and the send command is
+ *			executed asynchronously, a kernel-generated message
+ *			with an attached KDBUS_ITEM_REPLY_TIMEOUT item
+ *			is sent to @src_id. For synchronously executed send
+ *			command, the value denotes the maximum time the call
+ *			blocks to wait for a reply. The timeout is expected in
+ *			nanoseconds and as absolute CLOCK_MONOTONIC value.
+ * @cookie_reply:	A reply to the requesting message with the same
+ *			cookie. The requesting connection can match its
+ *			request and the reply with this value
+ * @items:		A list of kdbus_items containing the message payload
+ */
+struct kdbus_msg {
+	__u64 size;
+	__u64 flags;
+	__s64 priority;
+	__u64 dst_id;
+	__u64 src_id;
+	__u64 payload_type;
+	__u64 cookie;
+	union {
+		__u64 timeout_ns;
+		__u64 cookie_reply;
+	};
+	struct kdbus_item items[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_msg_info - returned message container
+ * @offset:		Offset of kdbus_msg slice in pool
+ * @msg_size:		Copy of the kdbus_msg.size field
+ * @return_flags:	Command return flags, kernel → userspace
+ */
+struct kdbus_msg_info {
+	__u64 offset;
+	__u64 msg_size;
+	__u64 return_flags;
+} __attribute__((__aligned__(8)));
+
+/**
+ * enum kdbus_send_flags - flags for sending messages
+ * @KDBUS_SEND_SYNC_REPLY:	Wait for destination connection to
+ *				reply to this message. The
+ *				KDBUS_CMD_SEND ioctl() will block
+ *				until the reply is received, and
+ *				reply in struct kdbus_cmd_send will
+ *				yield the offset in the sender's pool
+ *				where the reply can be found.
+ *				This flag is only valid if
+ *				@KDBUS_MSG_EXPECT_REPLY is set as well.
+ */
+enum kdbus_send_flags {
+	KDBUS_SEND_SYNC_REPLY		= 1ULL << 0,
+};
+
+/**
+ * struct kdbus_cmd_send - send message
+ * @size:		Overall size of this structure
+ * @flags:		Flags to change send behavior (KDBUS_SEND_*)
+ * @return_flags:	Command return flags, kernel → userspace
+ * @msg_address:	Storage address of the kdbus_msg to send
+ * @reply:		Storage for message reply if KDBUS_SEND_SYNC_REPLY
+ *			was given
+ * @items:		Additional items for this command
+ */
+struct kdbus_cmd_send {
+	__u64 size;
+	__u64 flags;
+	__u64 return_flags;
+	__u64 msg_address;
+	struct kdbus_msg_info reply;
+	struct kdbus_item items[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * enum kdbus_recv_flags - flags for de-queuing messages
+ * @KDBUS_RECV_PEEK:		Return the next queued message without
+ *				actually de-queuing it, and without installing
+ *				any file descriptors or other resources. It is
+ *				usually used to determine the activating
+ *				connection of a bus name.
+ * @KDBUS_RECV_DROP:		Drop and free the next queued message and all
+ *				its resources without actually receiving it.
+ * @KDBUS_RECV_USE_PRIORITY:	Only de-queue messages with the specified or
+ *				higher priority (lowest values); if not set,
+ *				the priority value is ignored.
+ */
+enum kdbus_recv_flags {
+	KDBUS_RECV_PEEK		= 1ULL <<  0,
+	KDBUS_RECV_DROP		= 1ULL <<  1,
+	KDBUS_RECV_USE_PRIORITY	= 1ULL <<  2,
+};
+
+/**
+ * enum kdbus_recv_return_flags - return flags for message receive commands
+ * @KDBUS_RECV_RETURN_INCOMPLETE_FDS:	One or more file descriptors could not
+ *					be installed. These descriptors in
+ *					KDBUS_ITEM_FDS will carry the value -1.
+ * @KDBUS_RECV_RETURN_DROPPED_MSGS:	There have been dropped messages since
+ *					the last time a message was received.
+ *					The 'dropped_msgs' counter contains the
+ *					number of messages dropped pool
+ *					overflows or other missed broadcasts.
+ */
+enum kdbus_recv_return_flags {
+	KDBUS_RECV_RETURN_INCOMPLETE_FDS	= 1ULL <<  0,
+	KDBUS_RECV_RETURN_DROPPED_MSGS		= 1ULL <<  1,
+};
+
+/**
+ * struct kdbus_cmd_recv - struct to de-queue a buffered message
+ * @size:		Overall size of this object
+ * @flags:		KDBUS_RECV_* flags, userspace → kernel
+ * @return_flags:	Command return flags, kernel → userspace
+ * @priority:		Minimum priority of the messages to de-queue. Lowest
+ *			values have the highest priority.
+ * @dropped_msgs:	In case there were any dropped messages since the last
+ *			time a message was received, this will be set to the
+ *			number of lost messages and
+ *			KDBUS_RECV_RETURN_DROPPED_MSGS will be set in
+ *			'return_flags'. This can only happen if the ioctl
+ *			returns 0 or EAGAIN.
+ * @msg:		Return storage for received message.
+ * @items:		Additional items for this command.
+ *
+ * This struct is used with the KDBUS_CMD_RECV ioctl.
+ */
+struct kdbus_cmd_recv {
+	__u64 size;
+	__u64 flags;
+	__u64 return_flags;
+	__s64 priority;
+	__u64 dropped_msgs;
+	struct kdbus_msg_info msg;
+	struct kdbus_item items[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_cmd_free - struct to free a slice of memory in the pool
+ * @size:		Overall size of this structure
+ * @flags:		Flags for the free command, userspace → kernel
+ * @return_flags:	Command return flags, kernel → userspace
+ * @offset:		The offset of the memory slice, as returned by other
+ *			ioctls
+ * @items:		Additional items to modify the behavior
+ *
+ * This struct is used with the KDBUS_CMD_FREE ioctl.
+ */
+struct kdbus_cmd_free {
+	__u64 size;
+	__u64 flags;
+	__u64 return_flags;
+	__u64 offset;
+	struct kdbus_item items[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * enum kdbus_hello_flags - flags for struct kdbus_cmd_hello
+ * @KDBUS_HELLO_ACCEPT_FD:	The connection allows the reception of
+ *				any passed file descriptors
+ * @KDBUS_HELLO_ACTIVATOR:	Special-purpose connection which registers
+ *				a well-know name for a process to be started
+ *				when traffic arrives
+ * @KDBUS_HELLO_POLICY_HOLDER:	Special-purpose connection which registers
+ *				policy entries for a name. The provided name
+ *				is not activated and not registered with the
+ *				name database, it only allows unprivileged
+ *				connections to acquire a name, talk or discover
+ *				a service
+ * @KDBUS_HELLO_MONITOR:	Special-purpose connection to monitor
+ *				bus traffic
+ */
+enum kdbus_hello_flags {
+	KDBUS_HELLO_ACCEPT_FD		=  1ULL <<  0,
+	KDBUS_HELLO_ACTIVATOR		=  1ULL <<  1,
+	KDBUS_HELLO_POLICY_HOLDER	=  1ULL <<  2,
+	KDBUS_HELLO_MONITOR		=  1ULL <<  3,
+};
+
+/**
+ * struct kdbus_cmd_hello - struct to say hello to kdbus
+ * @size:		The total size of the structure
+ * @flags:		Connection flags (KDBUS_HELLO_*), userspace → kernel
+ * @return_flags:	Command return flags, kernel → userspace
+ * @attach_flags_send:	Mask of metadata to attach to each message sent
+ *			off by this connection (KDBUS_ATTACH_*)
+ * @attach_flags_recv:	Mask of metadata to attach to each message receieved
+ *			by the new connection (KDBUS_ATTACH_*)
+ * @bus_flags:		The flags field copied verbatim from the original
+ *			KDBUS_CMD_BUS_MAKE ioctl. It's intended to be useful
+ *			to do negotiation of features of the payload that is
+ *			transferred (kernel → userspace)
+ * @id:			The ID of this connection (kernel → userspace)
+ * @pool_size:		Size of the connection's buffer where the received
+ *			messages are placed
+ * @offset:		Pool offset where items are returned to report
+ *			additional information about the bus and the newly
+ *			created connection.
+ * @items_size:		Size of buffer returned in the pool slice at @offset.
+ * @id128:		Unique 128-bit ID of the bus (kernel → userspace)
+ * @items:		A list of items
+ *
+ * This struct is used with the KDBUS_CMD_HELLO ioctl.
+ */
+struct kdbus_cmd_hello {
+	__u64 size;
+	__u64 flags;
+	__u64 return_flags;
+	__u64 attach_flags_send;
+	__u64 attach_flags_recv;
+	__u64 bus_flags;
+	__u64 id;
+	__u64 pool_size;
+	__u64 offset;
+	__u64 items_size;
+	__u8 id128[16];
+	struct kdbus_item items[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_info - connection information
+ * @size:		total size of the struct
+ * @id:			64bit object ID
+ * @flags:		object creation flags
+ * @items:		list of items
+ *
+ * Note that the user is responsible for freeing the allocated memory with
+ * the KDBUS_CMD_FREE ioctl.
+ */
+struct kdbus_info {
+	__u64 size;
+	__u64 id;
+	__u64 flags;
+	struct kdbus_item items[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * enum kdbus_list_flags - what to include into the returned list
+ * @KDBUS_LIST_UNIQUE:		active connections
+ * @KDBUS_LIST_ACTIVATORS:	activator connections
+ * @KDBUS_LIST_NAMES:		known well-known names
+ * @KDBUS_LIST_QUEUED:		queued-up names
+ */
+enum kdbus_list_flags {
+	KDBUS_LIST_UNIQUE		= 1ULL <<  0,
+	KDBUS_LIST_NAMES		= 1ULL <<  1,
+	KDBUS_LIST_ACTIVATORS		= 1ULL <<  2,
+	KDBUS_LIST_QUEUED		= 1ULL <<  3,
+};
+
+/**
+ * struct kdbus_cmd_list - list connections
+ * @size:		overall size of this object
+ * @flags:		flags for the query (KDBUS_LIST_*), userspace → kernel
+ * @return_flags:	command return flags, kernel → userspace
+ * @offset:		Offset in the caller's pool buffer where an array of
+ *			kdbus_info objects is stored.
+ *			The user must use KDBUS_CMD_FREE to free the
+ *			allocated memory.
+ * @list_size:		size of returned list in bytes
+ * @items:		Items for the command. Reserved for future use.
+ *
+ * This structure is used with the KDBUS_CMD_LIST ioctl.
+ */
+struct kdbus_cmd_list {
+	__u64 size;
+	__u64 flags;
+	__u64 return_flags;
+	__u64 offset;
+	__u64 list_size;
+	struct kdbus_item items[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * struct kdbus_cmd_info - struct used for KDBUS_CMD_CONN_INFO ioctl
+ * @size:		The total size of the struct
+ * @flags:		Flags for this ioctl, userspace → kernel
+ * @return_flags:	Command return flags, kernel → userspace
+ * @id:			The 64-bit ID of the connection. If set to zero, passing
+ *			@name is required. kdbus will look up the name to
+ *			determine the ID in this case.
+ * @attach_flags:	Set of attach flags to specify the set of information
+ *			to receive, userspace → kernel
+ * @offset:		Returned offset in the caller's pool buffer where the
+ *			kdbus_info struct result is stored. The user must
+ *			use KDBUS_CMD_FREE to free the allocated memory.
+ * @info_size:		Output buffer to report size of data at @offset.
+ * @items:		The optional item list, containing the
+ *			well-known name to look up as a KDBUS_ITEM_NAME.
+ *			Only needed in case @id is zero.
+ *
+ * On success, the KDBUS_CMD_CONN_INFO ioctl will return 0 and @offset will
+ * tell the user the offset in the connection pool buffer at which to find the
+ * result in a struct kdbus_info.
+ */
+struct kdbus_cmd_info {
+	__u64 size;
+	__u64 flags;
+	__u64 return_flags;
+	__u64 id;
+	__u64 attach_flags;
+	__u64 offset;
+	__u64 info_size;
+	struct kdbus_item items[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * enum kdbus_cmd_match_flags - flags to control the KDBUS_CMD_MATCH_ADD ioctl
+ * @KDBUS_MATCH_REPLACE:	If entries with the supplied cookie already
+ *				exists, remove them before installing the new
+ *				matches.
+ */
+enum kdbus_cmd_match_flags {
+	KDBUS_MATCH_REPLACE	= 1ULL <<  0,
+};
+
+/**
+ * struct kdbus_cmd_match - struct to add or remove matches
+ * @size:		The total size of the struct
+ * @flags:		Flags for match command (KDBUS_MATCH_*),
+ *			userspace → kernel
+ * @return_flags:	Command return flags, kernel → userspace
+ * @cookie:		Userspace supplied cookie. When removing, the cookie
+ *			identifies the match to remove
+ * @items:		A list of items for additional information
+ *
+ * This structure is used with the KDBUS_CMD_MATCH_ADD and
+ * KDBUS_CMD_MATCH_REMOVE ioctl.
+ */
+struct kdbus_cmd_match {
+	__u64 size;
+	__u64 flags;
+	__u64 return_flags;
+	__u64 cookie;
+	struct kdbus_item items[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * enum kdbus_make_flags - Flags for KDBUS_CMD_{BUS,ENDPOINT}_MAKE
+ * @KDBUS_MAKE_ACCESS_GROUP:	Make the bus or endpoint node group-accessible
+ * @KDBUS_MAKE_ACCESS_WORLD:	Make the bus or endpoint node world-accessible
+ */
+enum kdbus_make_flags {
+	KDBUS_MAKE_ACCESS_GROUP		= 1ULL <<  0,
+	KDBUS_MAKE_ACCESS_WORLD		= 1ULL <<  1,
+};
+
+/**
+ * enum kdbus_name_flags - flags for KDBUS_CMD_NAME_ACQUIRE
+ * @KDBUS_NAME_REPLACE_EXISTING:	Try to replace name of other connections
+ * @KDBUS_NAME_ALLOW_REPLACEMENT:	Allow the replacement of the name
+ * @KDBUS_NAME_QUEUE:			Name should be queued if busy
+ * @KDBUS_NAME_IN_QUEUE:		Name is queued
+ * @KDBUS_NAME_ACTIVATOR:		Name is owned by a activator connection
+ * @KDBUS_NAME_PRIMARY:			Primary owner of the name
+ * @KDBUS_NAME_ACQUIRED:		Name was acquired/queued _now_
+ */
+enum kdbus_name_flags {
+	KDBUS_NAME_REPLACE_EXISTING	= 1ULL <<  0,
+	KDBUS_NAME_ALLOW_REPLACEMENT	= 1ULL <<  1,
+	KDBUS_NAME_QUEUE		= 1ULL <<  2,
+	KDBUS_NAME_IN_QUEUE		= 1ULL <<  3,
+	KDBUS_NAME_ACTIVATOR		= 1ULL <<  4,
+	KDBUS_NAME_PRIMARY		= 1ULL <<  5,
+	KDBUS_NAME_ACQUIRED		= 1ULL <<  6,
+};
+
+/**
+ * struct kdbus_cmd - generic ioctl payload
+ * @size:		Overall size of this structure
+ * @flags:		Flags for this ioctl, userspace → kernel
+ * @return_flags:	Ioctl return flags, kernel → userspace
+ * @items:		Additional items to modify the behavior
+ *
+ * This is a generic ioctl payload object. It's used by all ioctls that only
+ * take flags and items as input.
+ */
+struct kdbus_cmd {
+	__u64 size;
+	__u64 flags;
+	__u64 return_flags;
+	struct kdbus_item items[0];
+} __attribute__((__aligned__(8)));
+
+/**
+ * Ioctl API
+ *
+ * KDBUS_CMD_BUS_MAKE:		After opening the "control" node, this command
+ *				creates a new bus with the specified
+ *				name. The bus is immediately shut down and
+ *				cleaned up when the opened file descriptor is
+ *				closed.
+ *
+ * KDBUS_CMD_ENDPOINT_MAKE:	Creates a new named special endpoint to talk to
+ *				the bus. Such endpoints usually carry a more
+ *				restrictive policy and grant restricted access
+ *				to specific applications.
+ * KDBUS_CMD_ENDPOINT_UPDATE:	Update the properties of a custom enpoint. Used
+ *				to update the policy.
+ *
+ * KDBUS_CMD_HELLO:		By opening the bus node, a connection is
+ *				created. After a HELLO the opened connection
+ *				becomes an active peer on the bus.
+ * KDBUS_CMD_UPDATE:		Update the properties of a connection. Used to
+ *				update the metadata subscription mask and
+ *				policy.
+ * KDBUS_CMD_BYEBYE:		Disconnect a connection. If there are no
+ *				messages queued up in the connection's pool,
+ *				the call succeeds, and the handle is rendered
+ *				unusable. Otherwise, -EBUSY is returned without
+ *				any further side-effects.
+ * KDBUS_CMD_FREE:		Release the allocated memory in the receiver's
+ *				pool.
+ * KDBUS_CMD_CONN_INFO:		Retrieve credentials and properties of the
+ *				initial creator of the connection. The data was
+ *				stored at registration time and does not
+ *				necessarily represent the connected process or
+ *				the actual state of the process.
+ * KDBUS_CMD_BUS_CREATOR_INFO:	Retrieve information of the creator of the bus
+ *				a connection is attached to.
+ *
+ * KDBUS_CMD_SEND:		Send a message and pass data from userspace to
+ *				the kernel.
+ * KDBUS_CMD_RECV:		Receive a message from the kernel which is
+ *				placed in the receiver's pool.
+ *
+ * KDBUS_CMD_NAME_ACQUIRE:	Request a well-known bus name to associate with
+ *				the connection. Well-known names are used to
+ *				address a peer on the bus.
+ * KDBUS_CMD_NAME_RELEASE:	Release a well-known name the connection
+ *				currently owns.
+ * KDBUS_CMD_LIST:		Retrieve the list of all currently registered
+ *				well-known and unique names.
+ *
+ * KDBUS_CMD_MATCH_ADD:		Install a match which broadcast messages should
+ *				be delivered to the connection.
+ * KDBUS_CMD_MATCH_REMOVE:	Remove a current match for broadcast messages.
+ */
+enum kdbus_ioctl_type {
+	/* bus owner (00-0f) */
+	KDBUS_CMD_BUS_MAKE =		_IOW(KDBUS_IOCTL_MAGIC, 0x00,
+					     struct kdbus_cmd),
+
+	/* endpoint owner (10-1f) */
+	KDBUS_CMD_ENDPOINT_MAKE =	_IOW(KDBUS_IOCTL_MAGIC, 0x10,
+					     struct kdbus_cmd),
+	KDBUS_CMD_ENDPOINT_UPDATE =	_IOW(KDBUS_IOCTL_MAGIC, 0x11,
+					     struct kdbus_cmd),
+
+	/* connection owner (80-ff) */
+	KDBUS_CMD_HELLO =		_IOWR(KDBUS_IOCTL_MAGIC, 0x80,
+					      struct kdbus_cmd_hello),
+	KDBUS_CMD_UPDATE =		_IOW(KDBUS_IOCTL_MAGIC, 0x81,
+					     struct kdbus_cmd),
+	KDBUS_CMD_BYEBYE =		_IOW(KDBUS_IOCTL_MAGIC, 0x82,
+					     struct kdbus_cmd),
+	KDBUS_CMD_FREE =		_IOW(KDBUS_IOCTL_MAGIC, 0x83,
+					     struct kdbus_cmd_free),
+	KDBUS_CMD_CONN_INFO =		_IOR(KDBUS_IOCTL_MAGIC, 0x84,
+					     struct kdbus_cmd_info),
+	KDBUS_CMD_BUS_CREATOR_INFO =	_IOR(KDBUS_IOCTL_MAGIC, 0x85,
+					     struct kdbus_cmd_info),
+	KDBUS_CMD_LIST =		_IOR(KDBUS_IOCTL_MAGIC, 0x86,
+					     struct kdbus_cmd_list),
+
+	KDBUS_CMD_SEND =		_IOW(KDBUS_IOCTL_MAGIC, 0x90,
+					     struct kdbus_cmd_send),
+	KDBUS_CMD_RECV =		_IOR(KDBUS_IOCTL_MAGIC, 0x91,
+					     struct kdbus_cmd_recv),
+
+	KDBUS_CMD_NAME_ACQUIRE =	_IOW(KDBUS_IOCTL_MAGIC, 0xa0,
+					     struct kdbus_cmd),
+	KDBUS_CMD_NAME_RELEASE =	_IOW(KDBUS_IOCTL_MAGIC, 0xa1,
+					     struct kdbus_cmd),
+
+	KDBUS_CMD_MATCH_ADD =		_IOW(KDBUS_IOCTL_MAGIC, 0xb0,
+					     struct kdbus_cmd_match),
+	KDBUS_CMD_MATCH_REMOVE =	_IOW(KDBUS_IOCTL_MAGIC, 0xb1,
+					     struct kdbus_cmd_match),
+};
+
+#endif /* _UAPI_KDBUS_H_ */
diff -Nur linux-4.2/include/uapi/linux/magic.h linux-4.2-pck/include/uapi/linux/magic.h
--- linux-4.2/include/uapi/linux/magic.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/uapi/linux/magic.h	2015-09-08 00:48:22.231697056 -0300
@@ -76,4 +76,6 @@
 #define BTRFS_TEST_MAGIC	0x73727279
 #define NSFS_MAGIC		0x6e736673
 
+#define KDBUS_SUPER_MAGIC	0x44427573
+
 #endif /* __LINUX_MAGIC_H__ */
diff -Nur linux-4.2/include/uapi/linux/netlink.h linux-4.2-pck/include/uapi/linux/netlink.h
--- linux-4.2/include/uapi/linux/netlink.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/uapi/linux/netlink.h	2015-09-08 11:20:32.353684177 -0300
@@ -27,6 +27,8 @@
 #define NETLINK_ECRYPTFS	19
 #define NETLINK_RDMA		20
 #define NETLINK_CRYPTO		21	/* Crypto layer */
+#define NETLINK_TOI_USERUI	22	/* TuxOnIce's userui */
+#define NETLINK_TOI_USM		23	/* Userspace storage manager */
 
 #define NETLINK_INET_DIAG	NETLINK_SOCK_DIAG
 
diff -Nur linux-4.2/include/uapi/linux/tcp.h linux-4.2-pck/include/uapi/linux/tcp.h
--- linux-4.2/include/uapi/linux/tcp.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/uapi/linux/tcp.h	2015-09-08 00:48:31.501257347 -0300
@@ -115,6 +115,9 @@
 #define TCP_CC_INFO		26	/* Get Congestion Control (optional) info */
 #define TCP_SAVE_SYN		27	/* Record SYN headers for new connections */
 #define TCP_SAVED_SYN		28	/* Get SYN headers recorded for connection */
+#define TCP_STEALTH		29
+#define TCP_STEALTH_INTEGRITY	30
+#define TCP_STEALTH_INTEGRITY_LEN	31
 
 struct tcp_repair_opt {
 	__u32	opt_code;
diff -Nur linux-4.2/include/uapi/linux/vt.h linux-4.2-pck/include/uapi/linux/vt.h
--- linux-4.2/include/uapi/linux/vt.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/include/uapi/linux/vt.h	2015-09-08 00:48:22.231697056 -0300
@@ -3,12 +3,26 @@
 
 
 /*
+ * We will make this definition solely for the purpose of making packages
+ * such as splashutils build, because they can not understand that
+ * NR_TTY_DEVICES is defined in the kernel configuration.
+ */
+#ifndef CONFIG_NR_TTY_DEVICES
+#define CONFIG_NR_TTY_DEVICES 63
+#endif
+
+/*
  * These constants are also useful for user-level apps (e.g., VC
  * resizing).
  */
 #define MIN_NR_CONSOLES 1       /* must be at least 1 */
-#define MAX_NR_CONSOLES	63	/* serial lines start at 64 */
-#define MAX_NR_USER_CONSOLES 63	/* must be root to allocate above this */
+
+/*
+ * NR_TTY_DEVICES:
+ * Value MUST be at least 11 and must never be higher then 63
+ */
+#define MAX_NR_CONSOLES CONFIG_NR_TTY_DEVICES		/* serial lines start above this */
+#define MAX_NR_USER_CONSOLES CONFIG_NR_TTY_DEVICES	/* must be root to allocate above this */
 		/* Note: the ioctl VT_GETSTATE does not work for
 		   consoles 16 and higher (since it returns a short) */
 
diff -Nur linux-4.2/init/Kconfig linux-4.2-pck/init/Kconfig
--- linux-4.2/init/Kconfig	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/init/Kconfig	2015-09-08 00:48:22.231697056 -0300
@@ -28,6 +28,26 @@
 
 menu "General setup"
 
+config PCK_INTERACTIVE
+	bool "Tune kernel for interactivity"
+	default y
+	help
+	  Tunes the kernel for responsiveness at the cost of throughput and power usage.
+
+	  --- VM ---
+	    Mem dirty before bg writeback..:  10 %  -> 128 MiB
+	    Mem dirty before sync writeback:  20 %  -> 256 MiB
+
+	  --- CPU Scheduler ---
+	    Scheduling latency.............:   6    ->   3    ms
+	    Minimal granularity............:   0.75 ->   0.3  ms
+	    Wakeup granularity.............:   1    ->   0.5  ms
+	    CPU migration cost.............:   0.5  ->   0.25 ms
+	    Bandwidth slice size...........:   5    ->   3    ms
+
+	  --- CPU Frequency Scaling ---
+	    Ondemand down scaling factor...:   1    ->  10
+
 config BROKEN
 	bool
 
@@ -261,6 +281,19 @@
 	depends on SYSCTL
 	default y
 
+config KDBUS
+	tristate "kdbus interprocess communication"
+	depends on TMPFS
+	help
+	  D-Bus is a system for low-latency, low-overhead, easy to use
+	  interprocess communication (IPC).
+
+	  See the man-pages and HTML files in Documentation/kdbus/
+	  that are generated by 'make mandocs' and 'make htmldocs'.
+
+	  If you have an ordinary machine, select M here. The module
+	  will be called kdbus.
+
 config CROSS_MEMORY_ATTACH
 	bool "Enable process_vm_readv/writev syscalls"
 	depends on MMU
@@ -1137,7 +1170,7 @@
 endif # CGROUPS
 
 config CHECKPOINT_RESTORE
-	bool "Checkpoint/restore support" if EXPERT
+	bool "Checkpoint/restore support"
 	select PROC_CHILDREN
 	default n
 	help
diff -Nur linux-4.2/init/do_mounts.c linux-4.2-pck/init/do_mounts.c
--- linux-4.2/init/do_mounts.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/init/do_mounts.c	2015-09-08 11:20:32.383682710 -0300
@@ -597,6 +597,8 @@
 	if (is_floppy && rd_doload && rd_load_disk(0))
 		ROOT_DEV = Root_RAM0;
 
+	check_resume_attempted();
+
 	mount_root();
 out:
 	devtmpfs_mount("dev");
diff -Nur linux-4.2/init/do_mounts_initrd.c linux-4.2-pck/init/do_mounts_initrd.c
--- linux-4.2/init/do_mounts_initrd.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/init/do_mounts_initrd.c	2015-09-08 11:20:32.393682222 -0300
@@ -15,6 +15,7 @@
 #include <linux/romfs_fs.h>
 #include <linux/initrd.h>
 #include <linux/sched.h>
+#include <linux/suspend.h>
 #include <linux/freezer.h>
 #include <linux/kmod.h>
 
@@ -79,6 +80,11 @@
 
 	current->flags &= ~PF_FREEZER_SKIP;
 
+	if (!resume_attempted)
+		printk(KERN_ERR "TuxOnIce: No attempt was made to resume from "
+				"any image that might exist.\n");
+	clear_toi_state(TOI_BOOT_TIME);
+
 	/* move initrd to rootfs' /old */
 	sys_mount("..", ".", NULL, MS_MOVE, NULL);
 	/* switch root and cwd back to / of rootfs */
diff -Nur linux-4.2/ipc/Makefile linux-4.2-pck/ipc/Makefile
--- linux-4.2/ipc/Makefile	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/ipc/Makefile	2015-09-08 00:48:22.231697056 -0300
@@ -9,4 +9,4 @@
 obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
 obj-$(CONFIG_IPC_NS) += namespace.o
 obj-$(CONFIG_POSIX_MQUEUE_SYSCTL) += mq_sysctl.o
-
+obj-$(CONFIG_KDBUS) += kdbus/
diff -Nur linux-4.2/ipc/kdbus/Makefile linux-4.2-pck/ipc/kdbus/Makefile
--- linux-4.2/ipc/kdbus/Makefile	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/Makefile	2015-09-08 00:48:22.231697056 -0300
@@ -0,0 +1,33 @@
+#
+# By setting KDBUS_EXT=2, the kdbus module will be built as kdbus2.ko, and
+# KBUILD_MODNAME=kdbus2. This has the effect that all exported objects have
+# different names than usually (kdbus2fs, /sys/fs/kdbus2/) and you can run
+# your test-infrastructure against the kdbus2.ko, while running your system
+# on kdbus.ko.
+#
+# To just build the module, use:
+#     make KDBUS_EXT=2 M=ipc/kdbus
+#
+
+kdbus$(KDBUS_EXT)-y := \
+	bus.o \
+	connection.o \
+	endpoint.o \
+	fs.o \
+	handle.o \
+	item.o \
+	main.o \
+	match.o \
+	message.o \
+	metadata.o \
+	names.o \
+	node.o \
+	notify.o \
+	domain.o \
+	policy.o \
+	pool.o \
+	reply.o \
+	queue.o \
+	util.o
+
+obj-$(CONFIG_KDBUS) += kdbus$(KDBUS_EXT).o
diff -Nur linux-4.2/ipc/kdbus/bus.c linux-4.2-pck/ipc/kdbus/bus.c
--- linux-4.2/ipc/kdbus/bus.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/bus.c	2015-09-08 00:48:22.231697056 -0300
@@ -0,0 +1,514 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/fs.h>
+#include <linux/hashtable.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/uio.h>
+
+#include "bus.h"
+#include "notify.h"
+#include "connection.h"
+#include "domain.h"
+#include "endpoint.h"
+#include "handle.h"
+#include "item.h"
+#include "match.h"
+#include "message.h"
+#include "metadata.h"
+#include "names.h"
+#include "policy.h"
+#include "util.h"
+
+static void kdbus_bus_free(struct kdbus_node *node)
+{
+	struct kdbus_bus *bus = container_of(node, struct kdbus_bus, node);
+
+	WARN_ON(!list_empty(&bus->monitors_list));
+	WARN_ON(!hash_empty(bus->conn_hash));
+
+	kdbus_notify_free(bus);
+
+	kdbus_user_unref(bus->creator);
+	kdbus_name_registry_free(bus->name_registry);
+	kdbus_domain_unref(bus->domain);
+	kdbus_policy_db_clear(&bus->policy_db);
+	kdbus_meta_proc_unref(bus->creator_meta);
+	kfree(bus);
+}
+
+static void kdbus_bus_release(struct kdbus_node *node, bool was_active)
+{
+	struct kdbus_bus *bus = container_of(node, struct kdbus_bus, node);
+
+	if (was_active)
+		atomic_dec(&bus->creator->buses);
+}
+
+static struct kdbus_bus *kdbus_bus_new(struct kdbus_domain *domain,
+				       const char *name,
+				       struct kdbus_bloom_parameter *bloom,
+				       const u64 *pattach_owner,
+				       u64 flags, kuid_t uid, kgid_t gid)
+{
+	struct kdbus_bus *b;
+	u64 attach_owner;
+	int ret;
+
+	if (bloom->size < 8 || bloom->size > KDBUS_BUS_BLOOM_MAX_SIZE ||
+	    !KDBUS_IS_ALIGNED8(bloom->size) || bloom->n_hash < 1)
+		return ERR_PTR(-EINVAL);
+
+	ret = kdbus_sanitize_attach_flags(pattach_owner ? *pattach_owner : 0,
+					  &attach_owner);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	ret = kdbus_verify_uid_prefix(name, domain->user_namespace, uid);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	b = kzalloc(sizeof(*b), GFP_KERNEL);
+	if (!b)
+		return ERR_PTR(-ENOMEM);
+
+	kdbus_node_init(&b->node, KDBUS_NODE_BUS);
+
+	b->node.free_cb = kdbus_bus_free;
+	b->node.release_cb = kdbus_bus_release;
+	b->node.uid = uid;
+	b->node.gid = gid;
+	b->node.mode = S_IRUSR | S_IXUSR;
+
+	if (flags & (KDBUS_MAKE_ACCESS_GROUP | KDBUS_MAKE_ACCESS_WORLD))
+		b->node.mode |= S_IRGRP | S_IXGRP;
+	if (flags & KDBUS_MAKE_ACCESS_WORLD)
+		b->node.mode |= S_IROTH | S_IXOTH;
+
+	b->id = atomic64_inc_return(&domain->last_id);
+	b->bus_flags = flags;
+	b->attach_flags_owner = attach_owner;
+	generate_random_uuid(b->id128);
+	b->bloom = *bloom;
+	b->domain = kdbus_domain_ref(domain);
+
+	kdbus_policy_db_init(&b->policy_db);
+
+	init_rwsem(&b->conn_rwlock);
+	hash_init(b->conn_hash);
+	INIT_LIST_HEAD(&b->monitors_list);
+
+	INIT_LIST_HEAD(&b->notify_list);
+	spin_lock_init(&b->notify_lock);
+	mutex_init(&b->notify_flush_lock);
+
+	ret = kdbus_node_link(&b->node, &domain->node, name);
+	if (ret < 0)
+		goto exit_unref;
+
+	/* cache the metadata/credentials of the creator */
+	b->creator_meta = kdbus_meta_proc_new();
+	if (IS_ERR(b->creator_meta)) {
+		ret = PTR_ERR(b->creator_meta);
+		b->creator_meta = NULL;
+		goto exit_unref;
+	}
+
+	ret = kdbus_meta_proc_collect(b->creator_meta,
+				      KDBUS_ATTACH_CREDS |
+				      KDBUS_ATTACH_PIDS |
+				      KDBUS_ATTACH_AUXGROUPS |
+				      KDBUS_ATTACH_TID_COMM |
+				      KDBUS_ATTACH_PID_COMM |
+				      KDBUS_ATTACH_EXE |
+				      KDBUS_ATTACH_CMDLINE |
+				      KDBUS_ATTACH_CGROUP |
+				      KDBUS_ATTACH_CAPS |
+				      KDBUS_ATTACH_SECLABEL |
+				      KDBUS_ATTACH_AUDIT);
+	if (ret < 0)
+		goto exit_unref;
+
+	b->name_registry = kdbus_name_registry_new();
+	if (IS_ERR(b->name_registry)) {
+		ret = PTR_ERR(b->name_registry);
+		b->name_registry = NULL;
+		goto exit_unref;
+	}
+
+	/*
+	 * Bus-limits of the creator are accounted on its real UID, just like
+	 * all other per-user limits.
+	 */
+	b->creator = kdbus_user_lookup(domain, current_uid());
+	if (IS_ERR(b->creator)) {
+		ret = PTR_ERR(b->creator);
+		b->creator = NULL;
+		goto exit_unref;
+	}
+
+	return b;
+
+exit_unref:
+	kdbus_node_drain(&b->node);
+	kdbus_node_unref(&b->node);
+	return ERR_PTR(ret);
+}
+
+/**
+ * kdbus_bus_ref() - increase the reference counter of a kdbus_bus
+ * @bus:		The bus to reference
+ *
+ * Every user of a bus, except for its creator, must add a reference to the
+ * kdbus_bus using this function.
+ *
+ * Return: the bus itself
+ */
+struct kdbus_bus *kdbus_bus_ref(struct kdbus_bus *bus)
+{
+	if (bus)
+		kdbus_node_ref(&bus->node);
+	return bus;
+}
+
+/**
+ * kdbus_bus_unref() - decrease the reference counter of a kdbus_bus
+ * @bus:		The bus to unref
+ *
+ * Release a reference. If the reference count drops to 0, the bus will be
+ * freed.
+ *
+ * Return: NULL
+ */
+struct kdbus_bus *kdbus_bus_unref(struct kdbus_bus *bus)
+{
+	if (bus)
+		kdbus_node_unref(&bus->node);
+	return NULL;
+}
+
+/**
+ * kdbus_bus_find_conn_by_id() - find a connection with a given id
+ * @bus:		The bus to look for the connection
+ * @id:			The 64-bit connection id
+ *
+ * Looks up a connection with a given id. The returned connection
+ * is ref'ed, and needs to be unref'ed by the user. Returns NULL if
+ * the connection can't be found.
+ */
+struct kdbus_conn *kdbus_bus_find_conn_by_id(struct kdbus_bus *bus, u64 id)
+{
+	struct kdbus_conn *conn, *found = NULL;
+
+	down_read(&bus->conn_rwlock);
+	hash_for_each_possible(bus->conn_hash, conn, hentry, id)
+		if (conn->id == id) {
+			found = kdbus_conn_ref(conn);
+			break;
+		}
+	up_read(&bus->conn_rwlock);
+
+	return found;
+}
+
+/**
+ * kdbus_bus_broadcast() - send a message to all subscribed connections
+ * @bus:	The bus the connections are connected to
+ * @conn_src:	The source connection, may be %NULL for kernel notifications
+ * @staging:	Staging object containing the message to send
+ *
+ * Send message to all connections that are currently active on the bus.
+ * Connections must still have matches installed in order to let the message
+ * pass.
+ *
+ * The caller must hold the name-registry lock of @bus.
+ */
+void kdbus_bus_broadcast(struct kdbus_bus *bus,
+			 struct kdbus_conn *conn_src,
+			 struct kdbus_staging *staging)
+{
+	struct kdbus_conn *conn_dst;
+	unsigned int i;
+	int ret;
+
+	lockdep_assert_held(&bus->name_registry->rwlock);
+
+	/*
+	 * Make sure broadcast are queued on monitors before we send it out to
+	 * anyone else. Otherwise, connections might react to broadcasts before
+	 * the monitor gets the broadcast queued. In the worst case, the
+	 * monitor sees a reaction to the broadcast before the broadcast itself.
+	 * We don't give ordering guarantees across connections (and monitors
+	 * can re-construct order via sequence numbers), but we should at least
+	 * try to avoid re-ordering for monitors.
+	 */
+	kdbus_bus_eavesdrop(bus, conn_src, staging);
+
+	down_read(&bus->conn_rwlock);
+	hash_for_each(bus->conn_hash, i, conn_dst, hentry) {
+		if (!kdbus_conn_is_ordinary(conn_dst))
+			continue;
+
+		/*
+		 * Check if there is a match for the kmsg object in
+		 * the destination connection match db
+		 */
+		if (!kdbus_match_db_match_msg(conn_dst->match_db, conn_src,
+					      staging))
+			continue;
+
+		if (conn_src) {
+			/*
+			 * Anyone can send broadcasts, as they have no
+			 * destination. But a receiver needs TALK access to
+			 * the sender in order to receive broadcasts.
+			 */
+			if (!kdbus_conn_policy_talk(conn_dst, NULL, conn_src))
+				continue;
+		} else {
+			/*
+			 * Check if there is a policy db that prevents the
+			 * destination connection from receiving this kernel
+			 * notification
+			 */
+			if (!kdbus_conn_policy_see_notification(conn_dst, NULL,
+								staging->msg))
+				continue;
+		}
+
+		ret = kdbus_conn_entry_insert(conn_src, conn_dst, staging,
+					      NULL, NULL);
+		if (ret < 0)
+			kdbus_conn_lost_message(conn_dst);
+	}
+	up_read(&bus->conn_rwlock);
+}
+
+/**
+ * kdbus_bus_eavesdrop() - send a message to all subscribed monitors
+ * @bus:	The bus the monitors are connected to
+ * @conn_src:	The source connection, may be %NULL for kernel notifications
+ * @staging:	Staging object containing the message to send
+ *
+ * Send message to all monitors that are currently active on the bus. Monitors
+ * must still have matches installed in order to let the message pass.
+ *
+ * The caller must hold the name-registry lock of @bus.
+ */
+void kdbus_bus_eavesdrop(struct kdbus_bus *bus,
+			 struct kdbus_conn *conn_src,
+			 struct kdbus_staging *staging)
+{
+	struct kdbus_conn *conn_dst;
+	int ret;
+
+	/*
+	 * Monitor connections get all messages; ignore possible errors
+	 * when sending messages to monitor connections.
+	 */
+
+	lockdep_assert_held(&bus->name_registry->rwlock);
+
+	down_read(&bus->conn_rwlock);
+	list_for_each_entry(conn_dst, &bus->monitors_list, monitor_entry) {
+		ret = kdbus_conn_entry_insert(conn_src, conn_dst, staging,
+					      NULL, NULL);
+		if (ret < 0)
+			kdbus_conn_lost_message(conn_dst);
+	}
+	up_read(&bus->conn_rwlock);
+}
+
+/**
+ * kdbus_cmd_bus_make() - handle KDBUS_CMD_BUS_MAKE
+ * @domain:		domain to operate on
+ * @argp:		command payload
+ *
+ * Return: NULL or newly created bus on success, ERR_PTR on failure.
+ */
+struct kdbus_bus *kdbus_cmd_bus_make(struct kdbus_domain *domain,
+				     void __user *argp)
+{
+	struct kdbus_bus *bus = NULL;
+	struct kdbus_cmd *cmd;
+	struct kdbus_ep *ep = NULL;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+		{ .type = KDBUS_ITEM_MAKE_NAME, .mandatory = true },
+		{ .type = KDBUS_ITEM_BLOOM_PARAMETER, .mandatory = true },
+		{ .type = KDBUS_ITEM_ATTACH_FLAGS_SEND },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE |
+				 KDBUS_MAKE_ACCESS_GROUP |
+				 KDBUS_MAKE_ACCESS_WORLD,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	if (ret > 0)
+		return NULL;
+
+	bus = kdbus_bus_new(domain,
+			    argv[1].item->str, &argv[2].item->bloom_parameter,
+			    argv[3].item ? argv[3].item->data64 : NULL,
+			    cmd->flags, current_euid(), current_egid());
+	if (IS_ERR(bus)) {
+		ret = PTR_ERR(bus);
+		bus = NULL;
+		goto exit;
+	}
+
+	if (atomic_inc_return(&bus->creator->buses) > KDBUS_USER_MAX_BUSES) {
+		atomic_dec(&bus->creator->buses);
+		ret = -EMFILE;
+		goto exit;
+	}
+
+	if (!kdbus_node_activate(&bus->node)) {
+		atomic_dec(&bus->creator->buses);
+		ret = -ESHUTDOWN;
+		goto exit;
+	}
+
+	ep = kdbus_ep_new(bus, "bus", cmd->flags, bus->node.uid, bus->node.gid,
+			  false);
+	if (IS_ERR(ep)) {
+		ret = PTR_ERR(ep);
+		ep = NULL;
+		goto exit;
+	}
+
+	if (!kdbus_node_activate(&ep->node)) {
+		ret = -ESHUTDOWN;
+		goto exit;
+	}
+
+	/*
+	 * Drop our own reference, effectively causing the endpoint to be
+	 * deactivated and released when the parent bus is.
+	 */
+	ep = kdbus_ep_unref(ep);
+
+exit:
+	ret = kdbus_args_clear(&args, ret);
+	if (ret < 0) {
+		if (ep) {
+			kdbus_node_drain(&ep->node);
+			kdbus_ep_unref(ep);
+		}
+		if (bus) {
+			kdbus_node_drain(&bus->node);
+			kdbus_bus_unref(bus);
+		}
+		return ERR_PTR(ret);
+	}
+	return bus;
+}
+
+/**
+ * kdbus_cmd_bus_creator_info() - handle KDBUS_CMD_BUS_CREATOR_INFO
+ * @conn:		connection to operate on
+ * @argp:		command payload
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_bus_creator_info(struct kdbus_conn *conn, void __user *argp)
+{
+	struct kdbus_cmd_info *cmd;
+	struct kdbus_bus *bus = conn->ep->bus;
+	struct kdbus_pool_slice *slice = NULL;
+	struct kdbus_item *meta_items = NULL;
+	struct kdbus_item_header item_hdr;
+	struct kdbus_info info = {};
+	size_t meta_size, name_len, cnt = 0;
+	struct kvec kvec[6];
+	u64 attach_flags, size = 0;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	ret = kdbus_sanitize_attach_flags(cmd->attach_flags, &attach_flags);
+	if (ret < 0)
+		goto exit;
+
+	attach_flags &= bus->attach_flags_owner;
+
+	ret = kdbus_meta_emit(bus->creator_meta, NULL, NULL, conn,
+			      attach_flags, &meta_items, &meta_size);
+	if (ret < 0)
+		goto exit;
+
+	name_len = strlen(bus->node.name) + 1;
+	info.id = bus->id;
+	info.flags = bus->bus_flags;
+	item_hdr.type = KDBUS_ITEM_MAKE_NAME;
+	item_hdr.size = KDBUS_ITEM_HEADER_SIZE + name_len;
+
+	kdbus_kvec_set(&kvec[cnt++], &info, sizeof(info), &size);
+	kdbus_kvec_set(&kvec[cnt++], &item_hdr, sizeof(item_hdr), &size);
+	kdbus_kvec_set(&kvec[cnt++], bus->node.name, name_len, &size);
+	cnt += !!kdbus_kvec_pad(&kvec[cnt], &size);
+	if (meta_size > 0) {
+		kdbus_kvec_set(&kvec[cnt++], meta_items, meta_size, &size);
+		cnt += !!kdbus_kvec_pad(&kvec[cnt], &size);
+	}
+
+	info.size = size;
+
+	slice = kdbus_pool_slice_alloc(conn->pool, size, false);
+	if (IS_ERR(slice)) {
+		ret = PTR_ERR(slice);
+		slice = NULL;
+		goto exit;
+	}
+
+	ret = kdbus_pool_slice_copy_kvec(slice, 0, kvec, cnt, size);
+	if (ret < 0)
+		goto exit;
+
+	kdbus_pool_slice_publish(slice, &cmd->offset, &cmd->info_size);
+
+	if (kdbus_member_set_user(&cmd->offset, argp, typeof(*cmd), offset) ||
+	    kdbus_member_set_user(&cmd->info_size, argp,
+				  typeof(*cmd), info_size))
+		ret = -EFAULT;
+
+exit:
+	kdbus_pool_slice_release(slice);
+	kfree(meta_items);
+	return kdbus_args_clear(&args, ret);
+}
diff -Nur linux-4.2/ipc/kdbus/bus.h linux-4.2-pck/ipc/kdbus/bus.h
--- linux-4.2/ipc/kdbus/bus.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/bus.h	2015-09-08 00:48:22.231697056 -0300
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_BUS_H
+#define __KDBUS_BUS_H
+
+#include <linux/hashtable.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/spinlock.h>
+#include <uapi/linux/kdbus.h>
+
+#include "metadata.h"
+#include "names.h"
+#include "node.h"
+#include "policy.h"
+
+struct kdbus_conn;
+struct kdbus_domain;
+struct kdbus_staging;
+struct kdbus_user;
+
+/**
+ * struct kdbus_bus - bus in a domain
+ * @node:		kdbus_node
+ * @id:			ID of this bus in the domain
+ * @bus_flags:		Simple pass-through flags from userspace to userspace
+ * @attach_flags_owner:	KDBUS_ATTACH_* flags of bus creator that other
+ *			connections can see or query
+ * @id128:		Unique random 128 bit ID of this bus
+ * @bloom:		Bloom parameters
+ * @domain:		Domain of this bus
+ * @creator:		Creator of the bus
+ * @creator_meta:	Meta information about the bus creator
+ * @last_message_id:	Last used message id
+ * @policy_db:		Policy database for this bus
+ * @name_registry:	Name registry of this bus
+ * @conn_rwlock:	Read/Write lock for all lists of child connections
+ * @conn_hash:		Map of connection IDs
+ * @monitors_list:	Connections that monitor this bus
+ * @notify_list:	List of pending kernel-generated messages
+ * @notify_lock:	Notification list lock
+ * @notify_flush_lock:	Notification flushing lock
+ */
+struct kdbus_bus {
+	struct kdbus_node node;
+
+	/* static */
+	u64 id;
+	u64 bus_flags;
+	u64 attach_flags_owner;
+	u8 id128[16];
+	struct kdbus_bloom_parameter bloom;
+	struct kdbus_domain *domain;
+	struct kdbus_user *creator;
+	struct kdbus_meta_proc *creator_meta;
+
+	/* protected by own locks */
+	atomic64_t last_message_id;
+	struct kdbus_policy_db policy_db;
+	struct kdbus_name_registry *name_registry;
+
+	/* protected by conn_rwlock */
+	struct rw_semaphore conn_rwlock;
+	DECLARE_HASHTABLE(conn_hash, 8);
+	struct list_head monitors_list;
+
+	/* protected by notify_lock */
+	struct list_head notify_list;
+	spinlock_t notify_lock;
+	struct mutex notify_flush_lock;
+};
+
+struct kdbus_bus *kdbus_bus_ref(struct kdbus_bus *bus);
+struct kdbus_bus *kdbus_bus_unref(struct kdbus_bus *bus);
+
+struct kdbus_conn *kdbus_bus_find_conn_by_id(struct kdbus_bus *bus, u64 id);
+void kdbus_bus_broadcast(struct kdbus_bus *bus,
+			 struct kdbus_conn *conn_src,
+			 struct kdbus_staging *staging);
+void kdbus_bus_eavesdrop(struct kdbus_bus *bus,
+			 struct kdbus_conn *conn_src,
+			 struct kdbus_staging *staging);
+
+struct kdbus_bus *kdbus_cmd_bus_make(struct kdbus_domain *domain,
+				     void __user *argp);
+int kdbus_cmd_bus_creator_info(struct kdbus_conn *conn, void __user *argp);
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/connection.c linux-4.2-pck/ipc/kdbus/connection.c
--- linux-4.2/ipc/kdbus/connection.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/connection.c	2015-09-08 00:48:22.231697056 -0300
@@ -0,0 +1,2227 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/audit.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fs_struct.h>
+#include <linux/hashtable.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/math64.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/path.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/shmem_fs.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/uio.h>
+
+#include "bus.h"
+#include "connection.h"
+#include "endpoint.h"
+#include "handle.h"
+#include "match.h"
+#include "message.h"
+#include "metadata.h"
+#include "names.h"
+#include "domain.h"
+#include "item.h"
+#include "notify.h"
+#include "policy.h"
+#include "pool.h"
+#include "reply.h"
+#include "util.h"
+#include "queue.h"
+
+#define KDBUS_CONN_ACTIVE_BIAS	(INT_MIN + 2)
+#define KDBUS_CONN_ACTIVE_NEW	(INT_MIN + 1)
+
+static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep,
+					 struct file *file,
+					 struct kdbus_cmd_hello *hello,
+					 const char *name,
+					 const struct kdbus_creds *creds,
+					 const struct kdbus_pids *pids,
+					 const char *seclabel,
+					 const char *conn_description)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	static struct lock_class_key __key;
+#endif
+	struct kdbus_pool_slice *slice = NULL;
+	struct kdbus_bus *bus = ep->bus;
+	struct kdbus_conn *conn;
+	u64 attach_flags_send;
+	u64 attach_flags_recv;
+	u64 items_size = 0;
+	bool is_policy_holder;
+	bool is_activator;
+	bool is_monitor;
+	bool privileged;
+	bool owner;
+	struct kvec kvec;
+	int ret;
+
+	struct {
+		u64 size;
+		u64 type;
+		struct kdbus_bloom_parameter bloom;
+	} bloom_item;
+
+	privileged = kdbus_ep_is_privileged(ep, file);
+	owner = kdbus_ep_is_owner(ep, file);
+
+	is_monitor = hello->flags & KDBUS_HELLO_MONITOR;
+	is_activator = hello->flags & KDBUS_HELLO_ACTIVATOR;
+	is_policy_holder = hello->flags & KDBUS_HELLO_POLICY_HOLDER;
+
+	if (!hello->pool_size || !IS_ALIGNED(hello->pool_size, PAGE_SIZE))
+		return ERR_PTR(-EINVAL);
+	if (is_monitor + is_activator + is_policy_holder > 1)
+		return ERR_PTR(-EINVAL);
+	if (name && !is_activator && !is_policy_holder)
+		return ERR_PTR(-EINVAL);
+	if (!name && (is_activator || is_policy_holder))
+		return ERR_PTR(-EINVAL);
+	if (name && !kdbus_name_is_valid(name, true))
+		return ERR_PTR(-EINVAL);
+	if (is_monitor && ep->user)
+		return ERR_PTR(-EOPNOTSUPP);
+	if (!owner && (is_activator || is_policy_holder || is_monitor))
+		return ERR_PTR(-EPERM);
+	if (!owner && (creds || pids || seclabel))
+		return ERR_PTR(-EPERM);
+
+	ret = kdbus_sanitize_attach_flags(hello->attach_flags_send,
+					  &attach_flags_send);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	ret = kdbus_sanitize_attach_flags(hello->attach_flags_recv,
+					  &attach_flags_recv);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	conn = kzalloc(sizeof(*conn), GFP_KERNEL);
+	if (!conn)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&conn->kref);
+	atomic_set(&conn->active, KDBUS_CONN_ACTIVE_NEW);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	lockdep_init_map(&conn->dep_map, "s_active", &__key, 0);
+#endif
+	mutex_init(&conn->lock);
+	INIT_LIST_HEAD(&conn->names_list);
+	INIT_LIST_HEAD(&conn->reply_list);
+	atomic_set(&conn->request_count, 0);
+	atomic_set(&conn->lost_count, 0);
+	INIT_DELAYED_WORK(&conn->work, kdbus_reply_list_scan_work);
+	conn->cred = get_cred(file->f_cred);
+	conn->pid = get_pid(task_pid(current));
+	get_fs_root(current->fs, &conn->root_path);
+	init_waitqueue_head(&conn->wait);
+	kdbus_queue_init(&conn->queue);
+	conn->privileged = privileged;
+	conn->owner = owner;
+	conn->ep = kdbus_ep_ref(ep);
+	conn->id = atomic64_inc_return(&bus->domain->last_id);
+	conn->flags = hello->flags;
+	atomic64_set(&conn->attach_flags_send, attach_flags_send);
+	atomic64_set(&conn->attach_flags_recv, attach_flags_recv);
+	INIT_LIST_HEAD(&conn->monitor_entry);
+
+	if (conn_description) {
+		conn->description = kstrdup(conn_description, GFP_KERNEL);
+		if (!conn->description) {
+			ret = -ENOMEM;
+			goto exit_unref;
+		}
+	}
+
+	conn->pool = kdbus_pool_new(conn->description, hello->pool_size);
+	if (IS_ERR(conn->pool)) {
+		ret = PTR_ERR(conn->pool);
+		conn->pool = NULL;
+		goto exit_unref;
+	}
+
+	conn->match_db = kdbus_match_db_new();
+	if (IS_ERR(conn->match_db)) {
+		ret = PTR_ERR(conn->match_db);
+		conn->match_db = NULL;
+		goto exit_unref;
+	}
+
+	/* return properties of this connection to the caller */
+	hello->bus_flags = bus->bus_flags;
+	hello->id = conn->id;
+
+	BUILD_BUG_ON(sizeof(bus->id128) != sizeof(hello->id128));
+	memcpy(hello->id128, bus->id128, sizeof(hello->id128));
+
+	/* privileged processes can impersonate somebody else */
+	if (creds || pids || seclabel) {
+		conn->meta_fake = kdbus_meta_fake_new();
+		if (IS_ERR(conn->meta_fake)) {
+			ret = PTR_ERR(conn->meta_fake);
+			conn->meta_fake = NULL;
+			goto exit_unref;
+		}
+
+		ret = kdbus_meta_fake_collect(conn->meta_fake,
+					      creds, pids, seclabel);
+		if (ret < 0)
+			goto exit_unref;
+	} else {
+		conn->meta_proc = kdbus_meta_proc_new();
+		if (IS_ERR(conn->meta_proc)) {
+			ret = PTR_ERR(conn->meta_proc);
+			conn->meta_proc = NULL;
+			goto exit_unref;
+		}
+
+		ret = kdbus_meta_proc_collect(conn->meta_proc,
+					      KDBUS_ATTACH_CREDS |
+					      KDBUS_ATTACH_PIDS |
+					      KDBUS_ATTACH_AUXGROUPS |
+					      KDBUS_ATTACH_TID_COMM |
+					      KDBUS_ATTACH_PID_COMM |
+					      KDBUS_ATTACH_EXE |
+					      KDBUS_ATTACH_CMDLINE |
+					      KDBUS_ATTACH_CGROUP |
+					      KDBUS_ATTACH_CAPS |
+					      KDBUS_ATTACH_SECLABEL |
+					      KDBUS_ATTACH_AUDIT);
+		if (ret < 0)
+			goto exit_unref;
+	}
+
+	/*
+	 * Account the connection against the current user (UID), or for
+	 * custom endpoints use the anonymous user assigned to the endpoint.
+	 * Note that limits are always accounted against the real UID, not
+	 * the effective UID (cred->user always points to the accounting of
+	 * cred->uid, not cred->euid).
+	 * In case the caller is privileged, we allow changing the accounting
+	 * to the faked user.
+	 */
+	if (ep->user) {
+		conn->user = kdbus_user_ref(ep->user);
+	} else {
+		kuid_t uid;
+
+		if (conn->meta_fake && uid_valid(conn->meta_fake->uid) &&
+		    conn->privileged)
+			uid = conn->meta_fake->uid;
+		else
+			uid = conn->cred->uid;
+
+		conn->user = kdbus_user_lookup(ep->bus->domain, uid);
+		if (IS_ERR(conn->user)) {
+			ret = PTR_ERR(conn->user);
+			conn->user = NULL;
+			goto exit_unref;
+		}
+	}
+
+	if (atomic_inc_return(&conn->user->connections) > KDBUS_USER_MAX_CONN) {
+		/* decremented by destructor as conn->user is valid */
+		ret = -EMFILE;
+		goto exit_unref;
+	}
+
+	bloom_item.size = sizeof(bloom_item);
+	bloom_item.type = KDBUS_ITEM_BLOOM_PARAMETER;
+	bloom_item.bloom = bus->bloom;
+	kdbus_kvec_set(&kvec, &bloom_item, bloom_item.size, &items_size);
+
+	slice = kdbus_pool_slice_alloc(conn->pool, items_size, false);
+	if (IS_ERR(slice)) {
+		ret = PTR_ERR(slice);
+		slice = NULL;
+		goto exit_unref;
+	}
+
+	ret = kdbus_pool_slice_copy_kvec(slice, 0, &kvec, 1, items_size);
+	if (ret < 0)
+		goto exit_unref;
+
+	kdbus_pool_slice_publish(slice, &hello->offset, &hello->items_size);
+	kdbus_pool_slice_release(slice);
+
+	return conn;
+
+exit_unref:
+	kdbus_pool_slice_release(slice);
+	kdbus_conn_unref(conn);
+	return ERR_PTR(ret);
+}
+
+static void __kdbus_conn_free(struct kref *kref)
+{
+	struct kdbus_conn *conn = container_of(kref, struct kdbus_conn, kref);
+
+	WARN_ON(kdbus_conn_active(conn));
+	WARN_ON(delayed_work_pending(&conn->work));
+	WARN_ON(!list_empty(&conn->queue.msg_list));
+	WARN_ON(!list_empty(&conn->names_list));
+	WARN_ON(!list_empty(&conn->reply_list));
+
+	if (conn->user) {
+		atomic_dec(&conn->user->connections);
+		kdbus_user_unref(conn->user);
+	}
+
+	kdbus_meta_fake_free(conn->meta_fake);
+	kdbus_meta_proc_unref(conn->meta_proc);
+	kdbus_match_db_free(conn->match_db);
+	kdbus_pool_free(conn->pool);
+	kdbus_ep_unref(conn->ep);
+	path_put(&conn->root_path);
+	put_pid(conn->pid);
+	put_cred(conn->cred);
+	kfree(conn->description);
+	kfree(conn->quota);
+	kfree(conn);
+}
+
+/**
+ * kdbus_conn_ref() - take a connection reference
+ * @conn:		Connection, may be %NULL
+ *
+ * Return: the connection itself
+ */
+struct kdbus_conn *kdbus_conn_ref(struct kdbus_conn *conn)
+{
+	if (conn)
+		kref_get(&conn->kref);
+	return conn;
+}
+
+/**
+ * kdbus_conn_unref() - drop a connection reference
+ * @conn:		Connection (may be NULL)
+ *
+ * When the last reference is dropped, the connection's internal structure
+ * is freed.
+ *
+ * Return: NULL
+ */
+struct kdbus_conn *kdbus_conn_unref(struct kdbus_conn *conn)
+{
+	if (conn)
+		kref_put(&conn->kref, __kdbus_conn_free);
+	return NULL;
+}
+
+/**
+ * kdbus_conn_active() - connection is not disconnected
+ * @conn:		Connection to check
+ *
+ * Return true if the connection was not disconnected, yet. Note that a
+ * connection might be disconnected asynchronously, unless you hold the
+ * connection lock. If that's not suitable for you, see kdbus_conn_acquire() to
+ * suppress connection shutdown for a short period.
+ *
+ * Return: true if the connection is still active
+ */
+bool kdbus_conn_active(const struct kdbus_conn *conn)
+{
+	return atomic_read(&conn->active) >= 0;
+}
+
+/**
+ * kdbus_conn_acquire() - acquire an active connection reference
+ * @conn:		Connection
+ *
+ * Users can close a connection via KDBUS_BYEBYE (or by destroying the
+ * endpoint/bus/...) at any time. Whenever this happens, we should deny any
+ * user-visible action on this connection and signal ECONNRESET instead.
+ * To avoid testing for connection availability everytime you take the
+ * connection-lock, you can acquire a connection for short periods.
+ *
+ * By calling kdbus_conn_acquire(), you gain an "active reference" to the
+ * connection. You must also hold a regular reference at any time! As long as
+ * you hold the active-ref, the connection will not be shut down. However, if
+ * the connection was shut down, you can never acquire an active-ref again.
+ *
+ * kdbus_conn_disconnect() disables the connection and then waits for all active
+ * references to be dropped. It will also wake up any pending operation.
+ * However, you must not sleep for an indefinite period while holding an
+ * active-reference. Otherwise, kdbus_conn_disconnect() might stall. If you need
+ * to sleep for an indefinite period, either release the reference and try to
+ * acquire it again after waking up, or make kdbus_conn_disconnect() wake up
+ * your wait-queue.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kdbus_conn_acquire(struct kdbus_conn *conn)
+{
+	if (!atomic_inc_unless_negative(&conn->active))
+		return -ECONNRESET;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	rwsem_acquire_read(&conn->dep_map, 0, 1, _RET_IP_);
+#endif
+
+	return 0;
+}
+
+/**
+ * kdbus_conn_release() - release an active connection reference
+ * @conn:		Connection
+ *
+ * This releases an active reference that has been acquired via
+ * kdbus_conn_acquire(). If the connection was already disabled and this is the
+ * last active-ref that is dropped, the disconnect-waiter will be woken up and
+ * properly close the connection.
+ */
+void kdbus_conn_release(struct kdbus_conn *conn)
+{
+	int v;
+
+	if (!conn)
+		return;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	rwsem_release(&conn->dep_map, 1, _RET_IP_);
+#endif
+
+	v = atomic_dec_return(&conn->active);
+	if (v != KDBUS_CONN_ACTIVE_BIAS)
+		return;
+
+	wake_up_all(&conn->wait);
+}
+
+static int kdbus_conn_connect(struct kdbus_conn *conn, const char *name)
+{
+	struct kdbus_ep *ep = conn->ep;
+	struct kdbus_bus *bus = ep->bus;
+	int ret;
+
+	if (WARN_ON(atomic_read(&conn->active) != KDBUS_CONN_ACTIVE_NEW))
+		return -EALREADY;
+
+	/* make sure the ep-node is active while we add our connection */
+	if (!kdbus_node_acquire(&ep->node))
+		return -ESHUTDOWN;
+
+	/* lock order: domain -> bus -> ep -> names -> conn */
+	mutex_lock(&ep->lock);
+	down_write(&bus->conn_rwlock);
+
+	/* link into monitor list */
+	if (kdbus_conn_is_monitor(conn))
+		list_add_tail(&conn->monitor_entry, &bus->monitors_list);
+
+	/* link into bus and endpoint */
+	list_add_tail(&conn->ep_entry, &ep->conn_list);
+	hash_add(bus->conn_hash, &conn->hentry, conn->id);
+
+	/* enable lookups and acquire active ref */
+	atomic_set(&conn->active, 1);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	rwsem_acquire_read(&conn->dep_map, 0, 1, _RET_IP_);
+#endif
+
+	up_write(&bus->conn_rwlock);
+	mutex_unlock(&ep->lock);
+
+	kdbus_node_release(&ep->node);
+
+	/*
+	 * Notify subscribers about the new active connection, unless it is
+	 * a monitor. Monitors are invisible on the bus, can't be addressed
+	 * directly, and won't cause any notifications.
+	 */
+	if (!kdbus_conn_is_monitor(conn)) {
+		ret = kdbus_notify_id_change(bus, KDBUS_ITEM_ID_ADD,
+					     conn->id, conn->flags);
+		if (ret < 0)
+			goto exit_disconnect;
+	}
+
+	if (kdbus_conn_is_activator(conn)) {
+		u64 flags = KDBUS_NAME_ACTIVATOR;
+
+		if (WARN_ON(!name)) {
+			ret = -EINVAL;
+			goto exit_disconnect;
+		}
+
+		ret = kdbus_name_acquire(bus->name_registry, conn, name,
+					 flags, NULL);
+		if (ret < 0)
+			goto exit_disconnect;
+	}
+
+	kdbus_conn_release(conn);
+	kdbus_notify_flush(bus);
+	return 0;
+
+exit_disconnect:
+	kdbus_conn_release(conn);
+	kdbus_conn_disconnect(conn, false);
+	return ret;
+}
+
+/**
+ * kdbus_conn_disconnect() - disconnect a connection
+ * @conn:		The connection to disconnect
+ * @ensure_queue_empty:	Flag to indicate if the call should fail in
+ *			case the connection's message list is not
+ *			empty
+ *
+ * If @ensure_msg_list_empty is true, and the connection has pending messages,
+ * -EBUSY is returned.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+int kdbus_conn_disconnect(struct kdbus_conn *conn, bool ensure_queue_empty)
+{
+	struct kdbus_queue_entry *entry, *tmp;
+	struct kdbus_bus *bus = conn->ep->bus;
+	struct kdbus_reply *r, *r_tmp;
+	struct kdbus_conn *c;
+	int i, v;
+
+	mutex_lock(&conn->lock);
+	v = atomic_read(&conn->active);
+	if (v == KDBUS_CONN_ACTIVE_NEW) {
+		/* was never connected */
+		mutex_unlock(&conn->lock);
+		return 0;
+	}
+	if (v < 0) {
+		/* already dead */
+		mutex_unlock(&conn->lock);
+		return -ECONNRESET;
+	}
+	if (ensure_queue_empty && !list_empty(&conn->queue.msg_list)) {
+		/* still busy */
+		mutex_unlock(&conn->lock);
+		return -EBUSY;
+	}
+
+	atomic_add(KDBUS_CONN_ACTIVE_BIAS, &conn->active);
+	mutex_unlock(&conn->lock);
+
+	wake_up_interruptible(&conn->wait);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	rwsem_acquire(&conn->dep_map, 0, 0, _RET_IP_);
+	if (atomic_read(&conn->active) != KDBUS_CONN_ACTIVE_BIAS)
+		lock_contended(&conn->dep_map, _RET_IP_);
+#endif
+
+	wait_event(conn->wait,
+		   atomic_read(&conn->active) == KDBUS_CONN_ACTIVE_BIAS);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	lock_acquired(&conn->dep_map, _RET_IP_);
+	rwsem_release(&conn->dep_map, 1, _RET_IP_);
+#endif
+
+	cancel_delayed_work_sync(&conn->work);
+	kdbus_policy_remove_owner(&conn->ep->bus->policy_db, conn);
+
+	/* lock order: domain -> bus -> ep -> names -> conn */
+	mutex_lock(&conn->ep->lock);
+	down_write(&bus->conn_rwlock);
+
+	/* remove from bus and endpoint */
+	hash_del(&conn->hentry);
+	list_del(&conn->monitor_entry);
+	list_del(&conn->ep_entry);
+
+	up_write(&bus->conn_rwlock);
+	mutex_unlock(&conn->ep->lock);
+
+	/*
+	 * Remove all names associated with this connection; this possibly
+	 * moves queued messages back to the activator connection.
+	 */
+	kdbus_name_release_all(bus->name_registry, conn);
+
+	/* if we die while other connections wait for our reply, notify them */
+	mutex_lock(&conn->lock);
+	list_for_each_entry_safe(entry, tmp, &conn->queue.msg_list, entry) {
+		if (entry->reply)
+			kdbus_notify_reply_dead(bus,
+						entry->reply->reply_dst->id,
+						entry->reply->cookie);
+		kdbus_queue_entry_free(entry);
+	}
+
+	list_for_each_entry_safe(r, r_tmp, &conn->reply_list, entry)
+		kdbus_reply_unlink(r);
+	mutex_unlock(&conn->lock);
+
+	/* lock order: domain -> bus -> ep -> names -> conn */
+	down_read(&bus->conn_rwlock);
+	hash_for_each(bus->conn_hash, i, c, hentry) {
+		mutex_lock(&c->lock);
+		list_for_each_entry_safe(r, r_tmp, &c->reply_list, entry) {
+			if (r->reply_src != conn)
+				continue;
+
+			if (r->sync)
+				kdbus_sync_reply_wakeup(r, -EPIPE);
+			else
+				/* send a 'connection dead' notification */
+				kdbus_notify_reply_dead(bus, c->id, r->cookie);
+
+			kdbus_reply_unlink(r);
+		}
+		mutex_unlock(&c->lock);
+	}
+	up_read(&bus->conn_rwlock);
+
+	if (!kdbus_conn_is_monitor(conn))
+		kdbus_notify_id_change(bus, KDBUS_ITEM_ID_REMOVE,
+				       conn->id, conn->flags);
+
+	kdbus_notify_flush(bus);
+
+	return 0;
+}
+
+/**
+ * kdbus_conn_has_name() - check if a connection owns a name
+ * @conn:		Connection
+ * @name:		Well-know name to check for
+ *
+ * The caller must hold the registry lock of conn->ep->bus.
+ *
+ * Return: true if the name is currently owned by the connection
+ */
+bool kdbus_conn_has_name(struct kdbus_conn *conn, const char *name)
+{
+	struct kdbus_name_owner *owner;
+
+	lockdep_assert_held(&conn->ep->bus->name_registry->rwlock);
+
+	list_for_each_entry(owner, &conn->names_list, conn_entry)
+		if (!(owner->flags & KDBUS_NAME_IN_QUEUE) &&
+		    !strcmp(name, owner->name->name))
+			return true;
+
+	return false;
+}
+
+struct kdbus_quota {
+	u32 memory;
+	u16 msgs;
+	u8 fds;
+};
+
+/**
+ * kdbus_conn_quota_inc() - increase quota accounting
+ * @c:		connection owning the quota tracking
+ * @u:		user to account for (or NULL for kernel accounting)
+ * @memory:	size of memory to account for
+ * @fds:	number of FDs to account for
+ *
+ * This call manages the quotas on resource @c. That is, it's used if other
+ * users want to use the resources of connection @c, which so far only concerns
+ * the receive queue of the destination.
+ *
+ * This increases the quota-accounting for user @u by @memory bytes and @fds
+ * file descriptors. If the user has already reached the quota limits, this call
+ * will not do any accounting but return a negative error code indicating the
+ * failure.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kdbus_conn_quota_inc(struct kdbus_conn *c, struct kdbus_user *u,
+			 size_t memory, size_t fds)
+{
+	struct kdbus_quota *quota;
+	size_t available, accounted;
+	unsigned int id;
+
+	/*
+	 * Pool Layout:
+	 * 50% of a pool is always owned by the connection. It is reserved for
+	 * kernel queries, handling received messages and other tasks that are
+	 * under control of the pool owner. The other 50% of the pool are used
+	 * as incoming queue.
+	 * As we optionally support user-space based policies, we need fair
+	 * allocation schemes. Furthermore, resource utilization should be
+	 * maximized, so only minimal resources stay reserved. However, we need
+	 * to adapt to a dynamic number of users, as we cannot know how many
+	 * users will talk to a connection. Therefore, the current allocation
+	 * works like this:
+	 * We limit the number of bytes in a destination's pool per sending
+	 * user. The space available for a user is 33% of the unused pool space
+	 * (whereas the space used by the user itself is also treated as
+	 * 'unused'). This way, we favor users coming first, but keep enough
+	 * pool space available for any following users. Given that messages are
+	 * dequeued in FIFO order, this should balance nicely if the number of
+	 * users grows. At the same time, this algorithm guarantees that the
+	 * space available to a connection is reduced dynamically, the more
+	 * concurrent users talk to a connection.
+	 */
+
+	/* per user-accounting is expensive, so we keep state small */
+	BUILD_BUG_ON(sizeof(quota->memory) != 4);
+	BUILD_BUG_ON(sizeof(quota->msgs) != 2);
+	BUILD_BUG_ON(sizeof(quota->fds) != 1);
+	BUILD_BUG_ON(KDBUS_CONN_MAX_MSGS > U16_MAX);
+	BUILD_BUG_ON(KDBUS_CONN_MAX_FDS_PER_USER > U8_MAX);
+
+	id = u ? u->id : KDBUS_USER_KERNEL_ID;
+	if (id >= c->n_quota) {
+		unsigned int users;
+
+		users = max(KDBUS_ALIGN8(id) + 8, id);
+		quota = krealloc(c->quota, users * sizeof(*quota),
+				 GFP_KERNEL | __GFP_ZERO);
+		if (!quota)
+			return -ENOMEM;
+
+		c->n_quota = users;
+		c->quota = quota;
+	}
+
+	quota = &c->quota[id];
+	kdbus_pool_accounted(c->pool, &available, &accounted);
+
+	/* half the pool is _always_ reserved for the pool owner */
+	available /= 2;
+
+	/*
+	 * Pool owner slices are un-accounted slices; they can claim more
+	 * than 50% of the queue. However, the slices we're dealing with here
+	 * belong to the incoming queue, hence they are 'accounted' slices
+	 * to which the 50%-limit applies.
+	 */
+	if (available < accounted)
+		return -ENOBUFS;
+
+	/* 1/3 of the remaining space (including your own memory) */
+	available = (available - accounted + quota->memory) / 3;
+
+	if (available < quota->memory ||
+	    available - quota->memory < memory ||
+	    quota->memory + memory > U32_MAX)
+		return -ENOBUFS;
+	if (quota->msgs >= KDBUS_CONN_MAX_MSGS)
+		return -ENOBUFS;
+	if (quota->fds + fds < quota->fds ||
+	    quota->fds + fds > KDBUS_CONN_MAX_FDS_PER_USER)
+		return -EMFILE;
+
+	quota->memory += memory;
+	quota->fds += fds;
+	++quota->msgs;
+	return 0;
+}
+
+/**
+ * kdbus_conn_quota_dec() - decrease quota accounting
+ * @c:		connection owning the quota tracking
+ * @u:		user which was accounted for (or NULL for kernel accounting)
+ * @memory:	size of memory which was accounted for
+ * @fds:	number of FDs which were accounted for
+ *
+ * This does the reverse of kdbus_conn_quota_inc(). You have to release any
+ * accounted resources that you called kdbus_conn_quota_inc() for. However, you
+ * must not call kdbus_conn_quota_dec() if the accounting failed (that is,
+ * kdbus_conn_quota_inc() failed).
+ */
+void kdbus_conn_quota_dec(struct kdbus_conn *c, struct kdbus_user *u,
+			  size_t memory, size_t fds)
+{
+	struct kdbus_quota *quota;
+	unsigned int id;
+
+	id = u ? u->id : KDBUS_USER_KERNEL_ID;
+	if (WARN_ON(id >= c->n_quota))
+		return;
+
+	quota = &c->quota[id];
+
+	if (!WARN_ON(quota->msgs == 0))
+		--quota->msgs;
+	if (!WARN_ON(quota->memory < memory))
+		quota->memory -= memory;
+	if (!WARN_ON(quota->fds < fds))
+		quota->fds -= fds;
+}
+
+/**
+ * kdbus_conn_lost_message() - handle lost messages
+ * @c:		connection that lost a message
+ *
+ * kdbus is reliable. That means, we try hard to never lose messages. However,
+ * memory is limited, so we cannot rely on transmissions to never fail.
+ * Therefore, we use quota-limits to let callers know if their unicast message
+ * cannot be transmitted to a peer. This works fine for unicasts, but for
+ * broadcasts we cannot make the caller handle the transmission failure.
+ * Instead, we must let the destination know that it couldn't receive a
+ * broadcast.
+ * As this is an unlikely scenario, we keep it simple. A single lost-counter
+ * remembers the number of lost messages since the last call to RECV. The next
+ * message retrieval will notify the connection that it lost messages since the
+ * last message retrieval and thus should resync its state.
+ */
+void kdbus_conn_lost_message(struct kdbus_conn *c)
+{
+	if (atomic_inc_return(&c->lost_count) == 1)
+		wake_up_interruptible(&c->wait);
+}
+
+/* Callers should take the conn_dst lock */
+static struct kdbus_queue_entry *
+kdbus_conn_entry_make(struct kdbus_conn *conn_src,
+		      struct kdbus_conn *conn_dst,
+		      struct kdbus_staging *staging)
+{
+	/* The remote connection was disconnected */
+	if (!kdbus_conn_active(conn_dst))
+		return ERR_PTR(-ECONNRESET);
+
+	/*
+	 * If the connection does not accept file descriptors but the message
+	 * has some attached, refuse it.
+	 *
+	 * If this is a monitor connection, accept the message. In that
+	 * case, all file descriptors will be set to -1 at receive time.
+	 */
+	if (!kdbus_conn_is_monitor(conn_dst) &&
+	    !(conn_dst->flags & KDBUS_HELLO_ACCEPT_FD) &&
+	    staging->gaps && staging->gaps->n_fds > 0)
+		return ERR_PTR(-ECOMM);
+
+	return kdbus_queue_entry_new(conn_src, conn_dst, staging);
+}
+
+/*
+ * Synchronously responding to a message, allocate a queue entry
+ * and attach it to the reply tracking object.
+ * The connection's queue will never get to see it.
+ */
+static int kdbus_conn_entry_sync_attach(struct kdbus_conn *conn_dst,
+					struct kdbus_staging *staging,
+					struct kdbus_reply *reply_wake)
+{
+	struct kdbus_queue_entry *entry;
+	int remote_ret, ret = 0;
+
+	mutex_lock(&reply_wake->reply_dst->lock);
+
+	/*
+	 * If we are still waiting then proceed, allocate a queue
+	 * entry and attach it to the reply object
+	 */
+	if (reply_wake->waiting) {
+		entry = kdbus_conn_entry_make(reply_wake->reply_src, conn_dst,
+					      staging);
+		if (IS_ERR(entry))
+			ret = PTR_ERR(entry);
+		else
+			/* Attach the entry to the reply object */
+			reply_wake->queue_entry = entry;
+	} else {
+		ret = -ECONNRESET;
+	}
+
+	/*
+	 * Update the reply object and wake up remote peer only
+	 * on appropriate return codes
+	 *
+	 * * -ECOMM: if the replying connection failed with -ECOMM
+	 *           then wakeup remote peer with -EREMOTEIO
+	 *
+	 *           We do this to differenciate between -ECOMM errors
+	 *           from the original sender perspective:
+	 *           -ECOMM error during the sync send and
+	 *           -ECOMM error during the sync reply, this last
+	 *           one is rewritten to -EREMOTEIO
+	 *
+	 * * Wake up on all other return codes.
+	 */
+	remote_ret = ret;
+
+	if (ret == -ECOMM)
+		remote_ret = -EREMOTEIO;
+
+	kdbus_sync_reply_wakeup(reply_wake, remote_ret);
+	kdbus_reply_unlink(reply_wake);
+	mutex_unlock(&reply_wake->reply_dst->lock);
+
+	return ret;
+}
+
+/**
+ * kdbus_conn_entry_insert() - enqueue a message into the receiver's pool
+ * @conn_src:		The sending connection
+ * @conn_dst:		The connection to queue into
+ * @staging:		Message to send
+ * @reply:		The reply tracker to attach to the queue entry
+ * @name:		Destination name this msg is sent to, or NULL
+ *
+ * Return: 0 on success. negative error otherwise.
+ */
+int kdbus_conn_entry_insert(struct kdbus_conn *conn_src,
+			    struct kdbus_conn *conn_dst,
+			    struct kdbus_staging *staging,
+			    struct kdbus_reply *reply,
+			    const struct kdbus_name_entry *name)
+{
+	struct kdbus_queue_entry *entry;
+	int ret;
+
+	kdbus_conn_lock2(conn_src, conn_dst);
+
+	entry = kdbus_conn_entry_make(conn_src, conn_dst, staging);
+	if (IS_ERR(entry)) {
+		ret = PTR_ERR(entry);
+		goto exit_unlock;
+	}
+
+	if (reply) {
+		kdbus_reply_link(reply);
+		if (!reply->sync)
+			schedule_delayed_work(&conn_src->work, 0);
+	}
+
+	/*
+	 * Record the sequence number of the registered name; it will
+	 * be remembered by the queue, in case messages addressed to a
+	 * name need to be moved from or to an activator.
+	 */
+	if (name)
+		entry->dst_name_id = name->name_id;
+
+	kdbus_queue_entry_enqueue(entry, reply);
+	wake_up_interruptible(&conn_dst->wait);
+
+	ret = 0;
+
+exit_unlock:
+	kdbus_conn_unlock2(conn_src, conn_dst);
+	return ret;
+}
+
+static int kdbus_conn_wait_reply(struct kdbus_conn *conn_src,
+				 struct kdbus_cmd_send *cmd_send,
+				 struct file *ioctl_file,
+				 struct file *cancel_fd,
+				 struct kdbus_reply *reply_wait,
+				 ktime_t expire)
+{
+	struct kdbus_queue_entry *entry;
+	struct poll_wqueues pwq = {};
+	int ret;
+
+	if (WARN_ON(!reply_wait))
+		return -EIO;
+
+	/*
+	 * Block until the reply arrives. reply_wait is left untouched
+	 * by the timeout scans that might be conducted for other,
+	 * asynchronous replies of conn_src.
+	 */
+
+	poll_initwait(&pwq);
+	poll_wait(ioctl_file, &conn_src->wait, &pwq.pt);
+
+	for (;;) {
+		/*
+		 * Any of the following conditions will stop our synchronously
+		 * blocking SEND command:
+		 *
+		 * a) The origin sender closed its connection
+		 * b) The remote peer answered, setting reply_wait->waiting = 0
+		 * c) The cancel FD was written to
+		 * d) A signal was received
+		 * e) The specified timeout was reached, and none of the above
+		 *    conditions kicked in.
+		 */
+
+		/*
+		 * We have already acquired an active reference when
+		 * entering here, but another thread may call
+		 * KDBUS_CMD_BYEBYE which does not acquire an active
+		 * reference, therefore kdbus_conn_disconnect() will
+		 * not wait for us.
+		 */
+		if (!kdbus_conn_active(conn_src)) {
+			ret = -ECONNRESET;
+			break;
+		}
+
+		/*
+		 * After the replying peer unset the waiting variable
+		 * it will wake up us.
+		 */
+		if (!reply_wait->waiting) {
+			ret = reply_wait->err;
+			break;
+		}
+
+		if (cancel_fd) {
+			unsigned int r;
+
+			r = cancel_fd->f_op->poll(cancel_fd, &pwq.pt);
+			if (r & POLLIN) {
+				ret = -ECANCELED;
+				break;
+			}
+		}
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
+		if (!poll_schedule_timeout(&pwq, TASK_INTERRUPTIBLE,
+					   &expire, 0)) {
+			ret = -ETIMEDOUT;
+			break;
+		}
+
+		/*
+		 * Reset the poll worker func, so the waitqueues are not
+		 * added to the poll table again. We just reuse what we've
+		 * collected earlier for further iterations.
+		 */
+		init_poll_funcptr(&pwq.pt, NULL);
+	}
+
+	poll_freewait(&pwq);
+
+	if (ret == -EINTR) {
+		/*
+		 * Interrupted system call. Unref the reply object, and pass
+		 * the return value down the chain. Mark the reply as
+		 * interrupted, so the cleanup work can remove it, but do not
+		 * unlink it from the list. Once the syscall restarts, we'll
+		 * pick it up and wait on it again.
+		 */
+		mutex_lock(&conn_src->lock);
+		reply_wait->interrupted = true;
+		schedule_delayed_work(&conn_src->work, 0);
+		mutex_unlock(&conn_src->lock);
+
+		return -ERESTARTSYS;
+	}
+
+	mutex_lock(&conn_src->lock);
+	reply_wait->waiting = false;
+	entry = reply_wait->queue_entry;
+	if (entry) {
+		ret = kdbus_queue_entry_install(entry,
+						&cmd_send->reply.return_flags,
+						true);
+		kdbus_pool_slice_publish(entry->slice, &cmd_send->reply.offset,
+					 &cmd_send->reply.msg_size);
+		kdbus_queue_entry_free(entry);
+	}
+	kdbus_reply_unlink(reply_wait);
+	mutex_unlock(&conn_src->lock);
+
+	return ret;
+}
+
+static int kdbus_pin_dst(struct kdbus_bus *bus,
+			 struct kdbus_staging *staging,
+			 struct kdbus_name_entry **out_name,
+			 struct kdbus_conn **out_dst)
+{
+	const struct kdbus_msg *msg = staging->msg;
+	struct kdbus_name_owner *owner = NULL;
+	struct kdbus_name_entry *name = NULL;
+	struct kdbus_conn *dst = NULL;
+	int ret;
+
+	lockdep_assert_held(&bus->name_registry->rwlock);
+
+	if (!staging->dst_name) {
+		dst = kdbus_bus_find_conn_by_id(bus, msg->dst_id);
+		if (!dst)
+			return -ENXIO;
+
+		if (!kdbus_conn_is_ordinary(dst)) {
+			ret = -ENXIO;
+			goto error;
+		}
+	} else {
+		name = kdbus_name_lookup_unlocked(bus->name_registry,
+						  staging->dst_name);
+		if (name)
+			owner = kdbus_name_get_owner(name);
+		if (!owner)
+			return -ESRCH;
+
+		/*
+		 * If both a name and a connection ID are given as destination
+		 * of a message, check that the currently owning connection of
+		 * the name matches the specified ID.
+		 * This way, we allow userspace to send the message to a
+		 * specific connection by ID only if the connection currently
+		 * owns the given name.
+		 */
+		if (msg->dst_id != KDBUS_DST_ID_NAME &&
+		    msg->dst_id != owner->conn->id)
+			return -EREMCHG;
+
+		if ((msg->flags & KDBUS_MSG_NO_AUTO_START) &&
+		    kdbus_conn_is_activator(owner->conn))
+			return -EADDRNOTAVAIL;
+
+		dst = kdbus_conn_ref(owner->conn);
+	}
+
+	*out_name = name;
+	*out_dst = dst;
+	return 0;
+
+error:
+	kdbus_conn_unref(dst);
+	return ret;
+}
+
+static int kdbus_conn_reply(struct kdbus_conn *src,
+			    struct kdbus_staging *staging)
+{
+	const struct kdbus_msg *msg = staging->msg;
+	struct kdbus_name_entry *name = NULL;
+	struct kdbus_reply *reply, *wake = NULL;
+	struct kdbus_conn *dst = NULL;
+	struct kdbus_bus *bus = src->ep->bus;
+	int ret;
+
+	if (WARN_ON(msg->dst_id == KDBUS_DST_ID_BROADCAST) ||
+	    WARN_ON(msg->flags & KDBUS_MSG_EXPECT_REPLY) ||
+	    WARN_ON(msg->flags & KDBUS_MSG_SIGNAL))
+		return -EINVAL;
+
+	/* name-registry must be locked for lookup *and* collecting data */
+	down_read(&bus->name_registry->rwlock);
+
+	/* find and pin destination */
+
+	ret = kdbus_pin_dst(bus, staging, &name, &dst);
+	if (ret < 0)
+		goto exit;
+
+	mutex_lock(&dst->lock);
+	reply = kdbus_reply_find(src, dst, msg->cookie_reply);
+	if (reply) {
+		if (reply->sync)
+			wake = kdbus_reply_ref(reply);
+		kdbus_reply_unlink(reply);
+	}
+	mutex_unlock(&dst->lock);
+
+	if (!reply) {
+		ret = -EBADSLT;
+		goto exit;
+	}
+
+	/* send message */
+
+	kdbus_bus_eavesdrop(bus, src, staging);
+
+	if (wake)
+		ret = kdbus_conn_entry_sync_attach(dst, staging, wake);
+	else
+		ret = kdbus_conn_entry_insert(src, dst, staging, NULL, name);
+
+exit:
+	up_read(&bus->name_registry->rwlock);
+	kdbus_reply_unref(wake);
+	kdbus_conn_unref(dst);
+	return ret;
+}
+
+static struct kdbus_reply *kdbus_conn_call(struct kdbus_conn *src,
+					   struct kdbus_staging *staging,
+					   ktime_t exp)
+{
+	const struct kdbus_msg *msg = staging->msg;
+	struct kdbus_name_entry *name = NULL;
+	struct kdbus_reply *wait = NULL;
+	struct kdbus_conn *dst = NULL;
+	struct kdbus_bus *bus = src->ep->bus;
+	int ret;
+
+	if (WARN_ON(msg->dst_id == KDBUS_DST_ID_BROADCAST) ||
+	    WARN_ON(msg->flags & KDBUS_MSG_SIGNAL) ||
+	    WARN_ON(!(msg->flags & KDBUS_MSG_EXPECT_REPLY)))
+		return ERR_PTR(-EINVAL);
+
+	/* resume previous wait-context, if available */
+
+	mutex_lock(&src->lock);
+	wait = kdbus_reply_find(NULL, src, msg->cookie);
+	if (wait) {
+		if (wait->interrupted) {
+			kdbus_reply_ref(wait);
+			wait->interrupted = false;
+		} else {
+			wait = NULL;
+		}
+	}
+	mutex_unlock(&src->lock);
+
+	if (wait)
+		return wait;
+
+	if (ktime_compare(ktime_get(), exp) >= 0)
+		return ERR_PTR(-ETIMEDOUT);
+
+	/* name-registry must be locked for lookup *and* collecting data */
+	down_read(&bus->name_registry->rwlock);
+
+	/* find and pin destination */
+
+	ret = kdbus_pin_dst(bus, staging, &name, &dst);
+	if (ret < 0)
+		goto exit;
+
+	if (!kdbus_conn_policy_talk(src, current_cred(), dst)) {
+		ret = -EPERM;
+		goto exit;
+	}
+
+	wait = kdbus_reply_new(dst, src, msg, name, true);
+	if (IS_ERR(wait)) {
+		ret = PTR_ERR(wait);
+		wait = NULL;
+		goto exit;
+	}
+
+	/* send message */
+
+	kdbus_bus_eavesdrop(bus, src, staging);
+
+	ret = kdbus_conn_entry_insert(src, dst, staging, wait, name);
+	if (ret < 0)
+		goto exit;
+
+	ret = 0;
+
+exit:
+	up_read(&bus->name_registry->rwlock);
+	if (ret < 0) {
+		kdbus_reply_unref(wait);
+		wait = ERR_PTR(ret);
+	}
+	kdbus_conn_unref(dst);
+	return wait;
+}
+
+static int kdbus_conn_unicast(struct kdbus_conn *src,
+			      struct kdbus_staging *staging)
+{
+	const struct kdbus_msg *msg = staging->msg;
+	struct kdbus_name_entry *name = NULL;
+	struct kdbus_reply *wait = NULL;
+	struct kdbus_conn *dst = NULL;
+	struct kdbus_bus *bus = src->ep->bus;
+	bool is_signal = (msg->flags & KDBUS_MSG_SIGNAL);
+	int ret = 0;
+
+	if (WARN_ON(msg->dst_id == KDBUS_DST_ID_BROADCAST) ||
+	    WARN_ON(!(msg->flags & KDBUS_MSG_EXPECT_REPLY) &&
+		    msg->cookie_reply != 0))
+		return -EINVAL;
+
+	/* name-registry must be locked for lookup *and* collecting data */
+	down_read(&bus->name_registry->rwlock);
+
+	/* find and pin destination */
+
+	ret = kdbus_pin_dst(bus, staging, &name, &dst);
+	if (ret < 0)
+		goto exit;
+
+	if (is_signal) {
+		/* like broadcasts we eavesdrop even if the msg is dropped */
+		kdbus_bus_eavesdrop(bus, src, staging);
+
+		/* drop silently if peer is not interested or not privileged */
+		if (!kdbus_match_db_match_msg(dst->match_db, src, staging) ||
+		    !kdbus_conn_policy_talk(dst, NULL, src))
+			goto exit;
+	} else if (!kdbus_conn_policy_talk(src, current_cred(), dst)) {
+		ret = -EPERM;
+		goto exit;
+	} else if (msg->flags & KDBUS_MSG_EXPECT_REPLY) {
+		wait = kdbus_reply_new(dst, src, msg, name, false);
+		if (IS_ERR(wait)) {
+			ret = PTR_ERR(wait);
+			wait = NULL;
+			goto exit;
+		}
+	}
+
+	/* send message */
+
+	if (!is_signal)
+		kdbus_bus_eavesdrop(bus, src, staging);
+
+	ret = kdbus_conn_entry_insert(src, dst, staging, wait, name);
+	if (ret < 0 && !is_signal)
+		goto exit;
+
+	/* signals are treated like broadcasts, recv-errors are ignored */
+	ret = 0;
+
+exit:
+	up_read(&bus->name_registry->rwlock);
+	kdbus_reply_unref(wait);
+	kdbus_conn_unref(dst);
+	return ret;
+}
+
+/**
+ * kdbus_conn_move_messages() - move messages from one connection to another
+ * @conn_dst:		Connection to copy to
+ * @conn_src:		Connection to copy from
+ * @name_id:		Filter for the sequence number of the registered
+ *			name, 0 means no filtering.
+ *
+ * Move all messages from one connection to another. This is used when
+ * an implementer connection is taking over/giving back a well-known name
+ * from/to an activator connection.
+ */
+void kdbus_conn_move_messages(struct kdbus_conn *conn_dst,
+			      struct kdbus_conn *conn_src,
+			      u64 name_id)
+{
+	struct kdbus_queue_entry *e, *e_tmp;
+	struct kdbus_reply *r, *r_tmp;
+	struct kdbus_bus *bus;
+	struct kdbus_conn *c;
+	LIST_HEAD(msg_list);
+	int i, ret = 0;
+
+	if (WARN_ON(conn_src == conn_dst))
+		return;
+
+	bus = conn_src->ep->bus;
+
+	/* lock order: domain -> bus -> ep -> names -> conn */
+	down_read(&bus->conn_rwlock);
+	hash_for_each(bus->conn_hash, i, c, hentry) {
+		if (c == conn_src || c == conn_dst)
+			continue;
+
+		mutex_lock(&c->lock);
+		list_for_each_entry_safe(r, r_tmp, &c->reply_list, entry) {
+			if (r->reply_src != conn_src)
+				continue;
+
+			/* filter messages for a specific name */
+			if (name_id > 0 && r->name_id != name_id)
+				continue;
+
+			kdbus_conn_unref(r->reply_src);
+			r->reply_src = kdbus_conn_ref(conn_dst);
+		}
+		mutex_unlock(&c->lock);
+	}
+	up_read(&bus->conn_rwlock);
+
+	kdbus_conn_lock2(conn_src, conn_dst);
+	list_for_each_entry_safe(e, e_tmp, &conn_src->queue.msg_list, entry) {
+		/* filter messages for a specific name */
+		if (name_id > 0 && e->dst_name_id != name_id)
+			continue;
+
+		if (!(conn_dst->flags & KDBUS_HELLO_ACCEPT_FD) &&
+		    e->gaps && e->gaps->n_fds > 0) {
+			kdbus_conn_lost_message(conn_dst);
+			kdbus_queue_entry_free(e);
+			continue;
+		}
+
+		ret = kdbus_queue_entry_move(e, conn_dst);
+		if (ret < 0) {
+			kdbus_conn_lost_message(conn_dst);
+			kdbus_queue_entry_free(e);
+			continue;
+		}
+	}
+	kdbus_conn_unlock2(conn_src, conn_dst);
+
+	/* wake up poll() */
+	wake_up_interruptible(&conn_dst->wait);
+}
+
+/* query the policy-database for all names of @whom */
+static bool kdbus_conn_policy_query_all(struct kdbus_conn *conn,
+					const struct cred *conn_creds,
+					struct kdbus_policy_db *db,
+					struct kdbus_conn *whom,
+					unsigned int access)
+{
+	struct kdbus_name_owner *owner;
+	bool pass = false;
+	int res;
+
+	lockdep_assert_held(&conn->ep->bus->name_registry->rwlock);
+
+	down_read(&db->entries_rwlock);
+	mutex_lock(&whom->lock);
+
+	list_for_each_entry(owner, &whom->names_list, conn_entry) {
+		if (owner->flags & KDBUS_NAME_IN_QUEUE)
+			continue;
+
+		res = kdbus_policy_query_unlocked(db,
+					conn_creds ? : conn->cred,
+					owner->name->name,
+					kdbus_strhash(owner->name->name));
+		if (res >= (int)access) {
+			pass = true;
+			break;
+		}
+	}
+
+	mutex_unlock(&whom->lock);
+	up_read(&db->entries_rwlock);
+
+	return pass;
+}
+
+/**
+ * kdbus_conn_policy_own_name() - verify a connection can own the given name
+ * @conn:		Connection
+ * @conn_creds:		Credentials of @conn to use for policy check
+ * @name:		Name
+ *
+ * This verifies that @conn is allowed to acquire the well-known name @name.
+ *
+ * Return: true if allowed, false if not.
+ */
+bool kdbus_conn_policy_own_name(struct kdbus_conn *conn,
+				const struct cred *conn_creds,
+				const char *name)
+{
+	unsigned int hash = kdbus_strhash(name);
+	int res;
+
+	if (!conn_creds)
+		conn_creds = conn->cred;
+
+	if (conn->ep->user) {
+		res = kdbus_policy_query(&conn->ep->policy_db, conn_creds,
+					 name, hash);
+		if (res < KDBUS_POLICY_OWN)
+			return false;
+	}
+
+	if (conn->owner)
+		return true;
+
+	res = kdbus_policy_query(&conn->ep->bus->policy_db, conn_creds,
+				 name, hash);
+	return res >= KDBUS_POLICY_OWN;
+}
+
+/**
+ * kdbus_conn_policy_talk() - verify a connection can talk to a given peer
+ * @conn:		Connection that tries to talk
+ * @conn_creds:		Credentials of @conn to use for policy check
+ * @to:			Connection that is talked to
+ *
+ * This verifies that @conn is allowed to talk to @to.
+ *
+ * Return: true if allowed, false if not.
+ */
+bool kdbus_conn_policy_talk(struct kdbus_conn *conn,
+			    const struct cred *conn_creds,
+			    struct kdbus_conn *to)
+{
+	if (!conn_creds)
+		conn_creds = conn->cred;
+
+	if (conn->ep->user &&
+	    !kdbus_conn_policy_query_all(conn, conn_creds, &conn->ep->policy_db,
+					 to, KDBUS_POLICY_TALK))
+		return false;
+
+	if (conn->owner)
+		return true;
+	if (uid_eq(conn_creds->euid, to->cred->uid))
+		return true;
+
+	return kdbus_conn_policy_query_all(conn, conn_creds,
+					   &conn->ep->bus->policy_db, to,
+					   KDBUS_POLICY_TALK);
+}
+
+/**
+ * kdbus_conn_policy_see_name_unlocked() - verify a connection can see a given
+ *					   name
+ * @conn:		Connection
+ * @conn_creds:		Credentials of @conn to use for policy check
+ * @name:		Name
+ *
+ * This verifies that @conn is allowed to see the well-known name @name. Caller
+ * must hold policy-lock.
+ *
+ * Return: true if allowed, false if not.
+ */
+bool kdbus_conn_policy_see_name_unlocked(struct kdbus_conn *conn,
+					 const struct cred *conn_creds,
+					 const char *name)
+{
+	int res;
+
+	/*
+	 * By default, all names are visible on a bus. SEE policies can only be
+	 * installed on custom endpoints, where by default no name is visible.
+	 */
+	if (!conn->ep->user)
+		return true;
+
+	res = kdbus_policy_query_unlocked(&conn->ep->policy_db,
+					  conn_creds ? : conn->cred,
+					  name, kdbus_strhash(name));
+	return res >= KDBUS_POLICY_SEE;
+}
+
+static bool kdbus_conn_policy_see_name(struct kdbus_conn *conn,
+				       const struct cred *conn_creds,
+				       const char *name)
+{
+	bool res;
+
+	down_read(&conn->ep->policy_db.entries_rwlock);
+	res = kdbus_conn_policy_see_name_unlocked(conn, conn_creds, name);
+	up_read(&conn->ep->policy_db.entries_rwlock);
+
+	return res;
+}
+
+static bool kdbus_conn_policy_see(struct kdbus_conn *conn,
+				  const struct cred *conn_creds,
+				  struct kdbus_conn *whom)
+{
+	/*
+	 * By default, all names are visible on a bus, so a connection can
+	 * always see other connections. SEE policies can only be installed on
+	 * custom endpoints, where by default no name is visible and we hide
+	 * peers from each other, unless you see at least _one_ name of the
+	 * peer.
+	 */
+	return !conn->ep->user ||
+	       kdbus_conn_policy_query_all(conn, conn_creds,
+					   &conn->ep->policy_db, whom,
+					   KDBUS_POLICY_SEE);
+}
+
+/**
+ * kdbus_conn_policy_see_notification() - verify a connection is allowed to
+ *					  receive a given kernel notification
+ * @conn:		Connection
+ * @conn_creds:		Credentials of @conn to use for policy check
+ * @msg:		Notification message
+ *
+ * This checks whether @conn is allowed to see the kernel notification.
+ *
+ * Return: true if allowed, false if not.
+ */
+bool kdbus_conn_policy_see_notification(struct kdbus_conn *conn,
+					const struct cred *conn_creds,
+					const struct kdbus_msg *msg)
+{
+	/*
+	 * Depending on the notification type, broadcasted kernel notifications
+	 * have to be filtered:
+	 *
+	 * KDBUS_ITEM_NAME_{ADD,REMOVE,CHANGE}: This notification is forwarded
+	 *     to a peer if, and only if, that peer can see the name this
+	 *     notification is for.
+	 *
+	 * KDBUS_ITEM_ID_{ADD,REMOVE}: Notifications for ID changes are
+	 *     broadcast to everyone, to allow tracking peers.
+	 */
+
+	switch (msg->items[0].type) {
+	case KDBUS_ITEM_NAME_ADD:
+	case KDBUS_ITEM_NAME_REMOVE:
+	case KDBUS_ITEM_NAME_CHANGE:
+		return kdbus_conn_policy_see_name(conn, conn_creds,
+					msg->items[0].name_change.name);
+
+	case KDBUS_ITEM_ID_ADD:
+	case KDBUS_ITEM_ID_REMOVE:
+		return true;
+
+	default:
+		WARN(1, "Invalid type for notification broadcast: %llu\n",
+		     (unsigned long long)msg->items[0].type);
+		return false;
+	}
+}
+
+/**
+ * kdbus_cmd_hello() - handle KDBUS_CMD_HELLO
+ * @ep:			Endpoint to operate on
+ * @file:		File this connection is opened on
+ * @argp:		Command payload
+ *
+ * Return: NULL or newly created connection on success, ERR_PTR on failure.
+ */
+struct kdbus_conn *kdbus_cmd_hello(struct kdbus_ep *ep, struct file *file,
+				   void __user *argp)
+{
+	struct kdbus_cmd_hello *cmd;
+	struct kdbus_conn *c = NULL;
+	const char *item_name;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+		{ .type = KDBUS_ITEM_NAME },
+		{ .type = KDBUS_ITEM_CREDS },
+		{ .type = KDBUS_ITEM_PIDS },
+		{ .type = KDBUS_ITEM_SECLABEL },
+		{ .type = KDBUS_ITEM_CONN_DESCRIPTION },
+		{ .type = KDBUS_ITEM_POLICY_ACCESS, .multiple = true },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE |
+				 KDBUS_HELLO_ACCEPT_FD |
+				 KDBUS_HELLO_ACTIVATOR |
+				 KDBUS_HELLO_POLICY_HOLDER |
+				 KDBUS_HELLO_MONITOR,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	if (ret > 0)
+		return NULL;
+
+	item_name = argv[1].item ? argv[1].item->str : NULL;
+
+	c = kdbus_conn_new(ep, file, cmd, item_name,
+			   argv[2].item ? &argv[2].item->creds : NULL,
+			   argv[3].item ? &argv[3].item->pids : NULL,
+			   argv[4].item ? argv[4].item->str : NULL,
+			   argv[5].item ? argv[5].item->str : NULL);
+	if (IS_ERR(c)) {
+		ret = PTR_ERR(c);
+		c = NULL;
+		goto exit;
+	}
+
+	ret = kdbus_conn_connect(c, item_name);
+	if (ret < 0)
+		goto exit;
+
+	if (kdbus_conn_is_activator(c) || kdbus_conn_is_policy_holder(c)) {
+		ret = kdbus_conn_acquire(c);
+		if (ret < 0)
+			goto exit;
+
+		ret = kdbus_policy_set(&c->ep->bus->policy_db, args.items,
+				       args.items_size, 1,
+				       kdbus_conn_is_policy_holder(c), c);
+		kdbus_conn_release(c);
+		if (ret < 0)
+			goto exit;
+	}
+
+	if (copy_to_user(argp, cmd, sizeof(*cmd)))
+		ret = -EFAULT;
+
+exit:
+	ret = kdbus_args_clear(&args, ret);
+	if (ret < 0) {
+		if (c) {
+			kdbus_conn_disconnect(c, false);
+			kdbus_conn_unref(c);
+		}
+		return ERR_PTR(ret);
+	}
+	return c;
+}
+
+/**
+ * kdbus_cmd_byebye_unlocked() - handle KDBUS_CMD_BYEBYE
+ * @conn:		connection to operate on
+ * @argp:		command payload
+ *
+ * The caller must not hold any active reference to @conn or this will deadlock.
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_byebye_unlocked(struct kdbus_conn *conn, void __user *argp)
+{
+	struct kdbus_cmd *cmd;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	if (!kdbus_conn_is_ordinary(conn))
+		return -EOPNOTSUPP;
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	ret = kdbus_conn_disconnect(conn, true);
+	return kdbus_args_clear(&args, ret);
+}
+
+/**
+ * kdbus_cmd_conn_info() - handle KDBUS_CMD_CONN_INFO
+ * @conn:		connection to operate on
+ * @argp:		command payload
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp)
+{
+	struct kdbus_meta_conn *conn_meta = NULL;
+	struct kdbus_pool_slice *slice = NULL;
+	struct kdbus_name_entry *entry = NULL;
+	struct kdbus_name_owner *owner = NULL;
+	struct kdbus_conn *owner_conn = NULL;
+	struct kdbus_item *meta_items = NULL;
+	struct kdbus_info info = {};
+	struct kdbus_cmd_info *cmd;
+	struct kdbus_bus *bus = conn->ep->bus;
+	struct kvec kvec[3];
+	size_t meta_size, cnt = 0;
+	const char *name;
+	u64 attach_flags, size = 0;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+		{ .type = KDBUS_ITEM_NAME },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	/* registry must be held throughout lookup *and* collecting data */
+	down_read(&bus->name_registry->rwlock);
+
+	ret = kdbus_sanitize_attach_flags(cmd->attach_flags, &attach_flags);
+	if (ret < 0)
+		goto exit;
+
+	name = argv[1].item ? argv[1].item->str : NULL;
+
+	if (name) {
+		entry = kdbus_name_lookup_unlocked(bus->name_registry, name);
+		if (entry)
+			owner = kdbus_name_get_owner(entry);
+		if (!owner ||
+		    !kdbus_conn_policy_see_name(conn, current_cred(), name) ||
+		    (cmd->id != 0 && owner->conn->id != cmd->id)) {
+			/* pretend a name doesn't exist if you cannot see it */
+			ret = -ESRCH;
+			goto exit;
+		}
+
+		owner_conn = kdbus_conn_ref(owner->conn);
+	} else if (cmd->id > 0) {
+		owner_conn = kdbus_bus_find_conn_by_id(bus, cmd->id);
+		if (!owner_conn || !kdbus_conn_policy_see(conn, current_cred(),
+							  owner_conn)) {
+			/* pretend an id doesn't exist if you cannot see it */
+			ret = -ENXIO;
+			goto exit;
+		}
+	} else {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	attach_flags &= atomic64_read(&owner_conn->attach_flags_send);
+
+	conn_meta = kdbus_meta_conn_new();
+	if (IS_ERR(conn_meta)) {
+		ret = PTR_ERR(conn_meta);
+		conn_meta = NULL;
+		goto exit;
+	}
+
+	ret = kdbus_meta_conn_collect(conn_meta, owner_conn, 0, attach_flags);
+	if (ret < 0)
+		goto exit;
+
+	ret = kdbus_meta_emit(owner_conn->meta_proc, owner_conn->meta_fake,
+			      conn_meta, conn, attach_flags,
+			      &meta_items, &meta_size);
+	if (ret < 0)
+		goto exit;
+
+	info.id = owner_conn->id;
+	info.flags = owner_conn->flags;
+
+	kdbus_kvec_set(&kvec[cnt++], &info, sizeof(info), &size);
+	if (meta_size > 0) {
+		kdbus_kvec_set(&kvec[cnt++], meta_items, meta_size, &size);
+		cnt += !!kdbus_kvec_pad(&kvec[cnt], &size);
+	}
+
+	info.size = size;
+
+	slice = kdbus_pool_slice_alloc(conn->pool, size, false);
+	if (IS_ERR(slice)) {
+		ret = PTR_ERR(slice);
+		slice = NULL;
+		goto exit;
+	}
+
+	ret = kdbus_pool_slice_copy_kvec(slice, 0, kvec, cnt, size);
+	if (ret < 0)
+		goto exit;
+
+	kdbus_pool_slice_publish(slice, &cmd->offset, &cmd->info_size);
+
+	if (kdbus_member_set_user(&cmd->offset, argp, typeof(*cmd), offset) ||
+	    kdbus_member_set_user(&cmd->info_size, argp,
+				  typeof(*cmd), info_size)) {
+		ret = -EFAULT;
+		goto exit;
+	}
+
+	ret = 0;
+
+exit:
+	up_read(&bus->name_registry->rwlock);
+	kdbus_pool_slice_release(slice);
+	kfree(meta_items);
+	kdbus_meta_conn_unref(conn_meta);
+	kdbus_conn_unref(owner_conn);
+	return kdbus_args_clear(&args, ret);
+}
+
+/**
+ * kdbus_cmd_update() - handle KDBUS_CMD_UPDATE
+ * @conn:		connection to operate on
+ * @argp:		command payload
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_update(struct kdbus_conn *conn, void __user *argp)
+{
+	struct kdbus_item *item_policy;
+	u64 *item_attach_send = NULL;
+	u64 *item_attach_recv = NULL;
+	struct kdbus_cmd *cmd;
+	u64 attach_send;
+	u64 attach_recv;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+		{ .type = KDBUS_ITEM_ATTACH_FLAGS_SEND },
+		{ .type = KDBUS_ITEM_ATTACH_FLAGS_RECV },
+		{ .type = KDBUS_ITEM_NAME, .multiple = true },
+		{ .type = KDBUS_ITEM_POLICY_ACCESS, .multiple = true },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	item_attach_send = argv[1].item ? &argv[1].item->data64[0] : NULL;
+	item_attach_recv = argv[2].item ? &argv[2].item->data64[0] : NULL;
+	item_policy = argv[3].item ? : argv[4].item;
+
+	if (item_attach_send) {
+		if (!kdbus_conn_is_ordinary(conn) &&
+		    !kdbus_conn_is_monitor(conn)) {
+			ret = -EOPNOTSUPP;
+			goto exit;
+		}
+
+		ret = kdbus_sanitize_attach_flags(*item_attach_send,
+						  &attach_send);
+		if (ret < 0)
+			goto exit;
+	}
+
+	if (item_attach_recv) {
+		if (!kdbus_conn_is_ordinary(conn) &&
+		    !kdbus_conn_is_monitor(conn) &&
+		    !kdbus_conn_is_activator(conn)) {
+			ret = -EOPNOTSUPP;
+			goto exit;
+		}
+
+		ret = kdbus_sanitize_attach_flags(*item_attach_recv,
+						  &attach_recv);
+		if (ret < 0)
+			goto exit;
+	}
+
+	if (item_policy && !kdbus_conn_is_policy_holder(conn)) {
+		ret = -EOPNOTSUPP;
+		goto exit;
+	}
+
+	/* now that we verified the input, update the connection */
+
+	if (item_policy) {
+		ret = kdbus_policy_set(&conn->ep->bus->policy_db, cmd->items,
+				       KDBUS_ITEMS_SIZE(cmd, items),
+				       1, true, conn);
+		if (ret < 0)
+			goto exit;
+	}
+
+	if (item_attach_send)
+		atomic64_set(&conn->attach_flags_send, attach_send);
+
+	if (item_attach_recv)
+		atomic64_set(&conn->attach_flags_recv, attach_recv);
+
+exit:
+	return kdbus_args_clear(&args, ret);
+}
+
+/**
+ * kdbus_cmd_send() - handle KDBUS_CMD_SEND
+ * @conn:		connection to operate on
+ * @f:			file this command was called on
+ * @argp:		command payload
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_send(struct kdbus_conn *conn, struct file *f, void __user *argp)
+{
+	struct kdbus_cmd_send *cmd;
+	struct kdbus_staging *staging = NULL;
+	struct kdbus_msg *msg = NULL;
+	struct file *cancel_fd = NULL;
+	int ret, ret2;
+
+	/* command arguments */
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+		{ .type = KDBUS_ITEM_CANCEL_FD },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE |
+				 KDBUS_SEND_SYNC_REPLY,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	/* message arguments */
+	struct kdbus_arg msg_argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+		{ .type = KDBUS_ITEM_PAYLOAD_VEC, .multiple = true },
+		{ .type = KDBUS_ITEM_PAYLOAD_MEMFD, .multiple = true },
+		{ .type = KDBUS_ITEM_FDS },
+		{ .type = KDBUS_ITEM_BLOOM_FILTER },
+		{ .type = KDBUS_ITEM_DST_NAME },
+	};
+	struct kdbus_args msg_args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE |
+				 KDBUS_MSG_EXPECT_REPLY |
+				 KDBUS_MSG_NO_AUTO_START |
+				 KDBUS_MSG_SIGNAL,
+		.argv = msg_argv,
+		.argc = ARRAY_SIZE(msg_argv),
+	};
+
+	if (!kdbus_conn_is_ordinary(conn))
+		return -EOPNOTSUPP;
+
+	/* make sure to parse both, @cmd and @msg on negotiation */
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret < 0)
+		goto exit;
+	else if (ret > 0 && !cmd->msg_address) /* negotiation without msg */
+		goto exit;
+
+	ret2 = kdbus_args_parse_msg(&msg_args, KDBUS_PTR(cmd->msg_address),
+				    &msg);
+	if (ret2 < 0) { /* cannot parse message */
+		ret = ret2;
+		goto exit;
+	} else if (ret2 > 0 && !ret) { /* msg-negot implies cmd-negot */
+		ret = -EINVAL;
+		goto exit;
+	} else if (ret > 0) { /* negotiation */
+		goto exit;
+	}
+
+	/* here we parsed both, @cmd and @msg, and neither wants negotiation */
+
+	cmd->reply.return_flags = 0;
+	kdbus_pool_publish_empty(conn->pool, &cmd->reply.offset,
+				 &cmd->reply.msg_size);
+
+	if (argv[1].item) {
+		cancel_fd = fget(argv[1].item->fds[0]);
+		if (!cancel_fd) {
+			ret = -EBADF;
+			goto exit;
+		}
+
+		if (!cancel_fd->f_op->poll) {
+			ret = -EINVAL;
+			goto exit;
+		}
+	}
+
+	/* patch-in the source of this message */
+	if (msg->src_id > 0 && msg->src_id != conn->id) {
+		ret = -EINVAL;
+		goto exit;
+	}
+	msg->src_id = conn->id;
+
+	staging = kdbus_staging_new_user(conn->ep->bus, cmd, msg);
+	if (IS_ERR(staging)) {
+		ret = PTR_ERR(staging);
+		staging = NULL;
+		goto exit;
+	}
+
+	if (msg->dst_id == KDBUS_DST_ID_BROADCAST) {
+		down_read(&conn->ep->bus->name_registry->rwlock);
+		kdbus_bus_broadcast(conn->ep->bus, conn, staging);
+		up_read(&conn->ep->bus->name_registry->rwlock);
+	} else if (cmd->flags & KDBUS_SEND_SYNC_REPLY) {
+		struct kdbus_reply *r;
+		ktime_t exp;
+
+		exp = ns_to_ktime(msg->timeout_ns);
+		r = kdbus_conn_call(conn, staging, exp);
+		if (IS_ERR(r)) {
+			ret = PTR_ERR(r);
+			goto exit;
+		}
+
+		ret = kdbus_conn_wait_reply(conn, cmd, f, cancel_fd, r, exp);
+		kdbus_reply_unref(r);
+		if (ret < 0)
+			goto exit;
+	} else if ((msg->flags & KDBUS_MSG_EXPECT_REPLY) ||
+		   msg->cookie_reply == 0) {
+		ret = kdbus_conn_unicast(conn, staging);
+		if (ret < 0)
+			goto exit;
+	} else {
+		ret = kdbus_conn_reply(conn, staging);
+		if (ret < 0)
+			goto exit;
+	}
+
+	if (kdbus_member_set_user(&cmd->reply, argp, typeof(*cmd), reply))
+		ret = -EFAULT;
+
+exit:
+	if (cancel_fd)
+		fput(cancel_fd);
+	kdbus_staging_free(staging);
+	ret = kdbus_args_clear(&msg_args, ret);
+	return kdbus_args_clear(&args, ret);
+}
+
+/**
+ * kdbus_cmd_recv() - handle KDBUS_CMD_RECV
+ * @conn:		connection to operate on
+ * @argp:		command payload
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_recv(struct kdbus_conn *conn, void __user *argp)
+{
+	struct kdbus_queue_entry *entry;
+	struct kdbus_cmd_recv *cmd;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE |
+				 KDBUS_RECV_PEEK |
+				 KDBUS_RECV_DROP |
+				 KDBUS_RECV_USE_PRIORITY,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	if (!kdbus_conn_is_ordinary(conn) &&
+	    !kdbus_conn_is_monitor(conn) &&
+	    !kdbus_conn_is_activator(conn))
+		return -EOPNOTSUPP;
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	cmd->dropped_msgs = 0;
+	cmd->msg.return_flags = 0;
+	kdbus_pool_publish_empty(conn->pool, &cmd->msg.offset,
+				 &cmd->msg.msg_size);
+
+	/* DROP+priority is not realiably, so prevent it */
+	if ((cmd->flags & KDBUS_RECV_DROP) &&
+	    (cmd->flags & KDBUS_RECV_USE_PRIORITY)) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	mutex_lock(&conn->lock);
+
+	entry = kdbus_queue_peek(&conn->queue, cmd->priority,
+				 cmd->flags & KDBUS_RECV_USE_PRIORITY);
+	if (!entry) {
+		mutex_unlock(&conn->lock);
+		ret = -EAGAIN;
+	} else if (cmd->flags & KDBUS_RECV_DROP) {
+		struct kdbus_reply *reply = kdbus_reply_ref(entry->reply);
+
+		kdbus_queue_entry_free(entry);
+
+		mutex_unlock(&conn->lock);
+
+		if (reply) {
+			mutex_lock(&reply->reply_dst->lock);
+			if (!list_empty(&reply->entry)) {
+				kdbus_reply_unlink(reply);
+				if (reply->sync)
+					kdbus_sync_reply_wakeup(reply, -EPIPE);
+				else
+					kdbus_notify_reply_dead(conn->ep->bus,
+							reply->reply_dst->id,
+							reply->cookie);
+			}
+			mutex_unlock(&reply->reply_dst->lock);
+			kdbus_notify_flush(conn->ep->bus);
+		}
+
+		kdbus_reply_unref(reply);
+	} else {
+		bool install_fds;
+
+		/*
+		 * PEEK just returns the location of the next message. Do not
+		 * install FDs nor memfds nor anything else. The only
+		 * information of interest should be the message header and
+		 * metadata. Any FD numbers in the payload is undefined for
+		 * PEEK'ed messages.
+		 * Also make sure to never install fds into a connection that
+		 * has refused to receive any. Ordinary connections will not get
+		 * messages with FDs queued (the receiver will get -ECOMM), but
+		 * eavesdroppers might.
+		 */
+		install_fds = (conn->flags & KDBUS_HELLO_ACCEPT_FD) &&
+			      !(cmd->flags & KDBUS_RECV_PEEK);
+
+		ret = kdbus_queue_entry_install(entry,
+						&cmd->msg.return_flags,
+						install_fds);
+		if (ret < 0) {
+			mutex_unlock(&conn->lock);
+			goto exit;
+		}
+
+		kdbus_pool_slice_publish(entry->slice, &cmd->msg.offset,
+					 &cmd->msg.msg_size);
+
+		if (!(cmd->flags & KDBUS_RECV_PEEK))
+			kdbus_queue_entry_free(entry);
+
+		mutex_unlock(&conn->lock);
+	}
+
+	cmd->dropped_msgs = atomic_xchg(&conn->lost_count, 0);
+	if (cmd->dropped_msgs > 0)
+		cmd->return_flags |= KDBUS_RECV_RETURN_DROPPED_MSGS;
+
+	if (kdbus_member_set_user(&cmd->msg, argp, typeof(*cmd), msg) ||
+	    kdbus_member_set_user(&cmd->dropped_msgs, argp, typeof(*cmd),
+				  dropped_msgs))
+		ret = -EFAULT;
+
+exit:
+	return kdbus_args_clear(&args, ret);
+}
+
+/**
+ * kdbus_cmd_free() - handle KDBUS_CMD_FREE
+ * @conn:		connection to operate on
+ * @argp:		command payload
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_free(struct kdbus_conn *conn, void __user *argp)
+{
+	struct kdbus_cmd_free *cmd;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	if (!kdbus_conn_is_ordinary(conn) &&
+	    !kdbus_conn_is_monitor(conn) &&
+	    !kdbus_conn_is_activator(conn))
+		return -EOPNOTSUPP;
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	ret = kdbus_pool_release_offset(conn->pool, cmd->offset);
+
+	return kdbus_args_clear(&args, ret);
+}
diff -Nur linux-4.2/ipc/kdbus/connection.h linux-4.2-pck/ipc/kdbus/connection.h
--- linux-4.2/ipc/kdbus/connection.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/connection.h	2015-09-08 00:48:22.231697056 -0300
@@ -0,0 +1,260 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_CONNECTION_H
+#define __KDBUS_CONNECTION_H
+
+#include <linux/atomic.h>
+#include <linux/kref.h>
+#include <linux/lockdep.h>
+#include <linux/path.h>
+
+#include "limits.h"
+#include "metadata.h"
+#include "pool.h"
+#include "queue.h"
+#include "util.h"
+
+#define KDBUS_HELLO_SPECIAL_CONN	(KDBUS_HELLO_ACTIVATOR | \
+					 KDBUS_HELLO_POLICY_HOLDER | \
+					 KDBUS_HELLO_MONITOR)
+
+struct kdbus_name_entry;
+struct kdbus_quota;
+struct kdbus_staging;
+
+/**
+ * struct kdbus_conn - connection to a bus
+ * @kref:		Reference count
+ * @active:		Active references to the connection
+ * @id:			Connection ID
+ * @flags:		KDBUS_HELLO_* flags
+ * @attach_flags_send:	KDBUS_ATTACH_* flags for sending
+ * @attach_flags_recv:	KDBUS_ATTACH_* flags for receiving
+ * @description:	Human-readable connection description, used for
+ *			debugging. This field is only set when the
+ *			connection is created.
+ * @ep:			The endpoint this connection belongs to
+ * @lock:		Connection data lock
+ * @hentry:		Entry in ID <-> connection map
+ * @ep_entry:		Entry in endpoint
+ * @monitor_entry:	Entry in monitor, if the connection is a monitor
+ * @reply_list:		List of connections this connection should
+ *			reply to
+ * @work:		Delayed work to handle timeouts
+ *			activator for
+ * @match_db:		Subscription filter to broadcast messages
+ * @meta_proc:		Process metadata of connection creator, or NULL
+ * @meta_fake:		Faked metadata, or NULL
+ * @pool:		The user's buffer to receive messages
+ * @user:		Owner of the connection
+ * @cred:		The credentials of the connection at creation time
+ * @pid:		Pid at creation time
+ * @root_path:		Root path at creation time
+ * @request_count:	Number of pending requests issued by this
+ *			connection that are waiting for replies from
+ *			other peers
+ * @lost_count:		Number of lost broadcast messages
+ * @wait:		Wake up this endpoint
+ * @queue:		The message queue associated with this connection
+ * @quota:		Array of per-user quota indexed by user->id
+ * @n_quota:		Number of elements in quota array
+ * @names_list:		List of well-known names
+ * @name_count:		Number of owned well-known names
+ * @privileged:		Whether this connection is privileged on the domain
+ * @owner:		Owned by the same user as the bus owner
+ */
+struct kdbus_conn {
+	struct kref kref;
+	atomic_t active;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
+	u64 id;
+	u64 flags;
+	atomic64_t attach_flags_send;
+	atomic64_t attach_flags_recv;
+	const char *description;
+	struct kdbus_ep *ep;
+	struct mutex lock;
+	struct hlist_node hentry;
+	struct list_head ep_entry;
+	struct list_head monitor_entry;
+	struct list_head reply_list;
+	struct delayed_work work;
+	struct kdbus_match_db *match_db;
+	struct kdbus_meta_proc *meta_proc;
+	struct kdbus_meta_fake *meta_fake;
+	struct kdbus_pool *pool;
+	struct kdbus_user *user;
+	const struct cred *cred;
+	struct pid *pid;
+	struct path root_path;
+	atomic_t request_count;
+	atomic_t lost_count;
+	wait_queue_head_t wait;
+	struct kdbus_queue queue;
+
+	struct kdbus_quota *quota;
+	unsigned int n_quota;
+
+	/* protected by registry->rwlock */
+	struct list_head names_list;
+	unsigned int name_count;
+
+	bool privileged:1;
+	bool owner:1;
+};
+
+struct kdbus_conn *kdbus_conn_ref(struct kdbus_conn *conn);
+struct kdbus_conn *kdbus_conn_unref(struct kdbus_conn *conn);
+bool kdbus_conn_active(const struct kdbus_conn *conn);
+int kdbus_conn_acquire(struct kdbus_conn *conn);
+void kdbus_conn_release(struct kdbus_conn *conn);
+int kdbus_conn_disconnect(struct kdbus_conn *conn, bool ensure_queue_empty);
+bool kdbus_conn_has_name(struct kdbus_conn *conn, const char *name);
+int kdbus_conn_quota_inc(struct kdbus_conn *c, struct kdbus_user *u,
+			 size_t memory, size_t fds);
+void kdbus_conn_quota_dec(struct kdbus_conn *c, struct kdbus_user *u,
+			  size_t memory, size_t fds);
+void kdbus_conn_lost_message(struct kdbus_conn *c);
+int kdbus_conn_entry_insert(struct kdbus_conn *conn_src,
+			    struct kdbus_conn *conn_dst,
+			    struct kdbus_staging *staging,
+			    struct kdbus_reply *reply,
+			    const struct kdbus_name_entry *name);
+void kdbus_conn_move_messages(struct kdbus_conn *conn_dst,
+			      struct kdbus_conn *conn_src,
+			      u64 name_id);
+
+/* policy */
+bool kdbus_conn_policy_own_name(struct kdbus_conn *conn,
+				const struct cred *conn_creds,
+				const char *name);
+bool kdbus_conn_policy_talk(struct kdbus_conn *conn,
+			    const struct cred *conn_creds,
+			    struct kdbus_conn *to);
+bool kdbus_conn_policy_see_name_unlocked(struct kdbus_conn *conn,
+					 const struct cred *curr_creds,
+					 const char *name);
+bool kdbus_conn_policy_see_notification(struct kdbus_conn *conn,
+					const struct cred *curr_creds,
+					const struct kdbus_msg *msg);
+
+/* command dispatcher */
+struct kdbus_conn *kdbus_cmd_hello(struct kdbus_ep *ep, struct file *file,
+				   void __user *argp);
+int kdbus_cmd_byebye_unlocked(struct kdbus_conn *conn, void __user *argp);
+int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp);
+int kdbus_cmd_update(struct kdbus_conn *conn, void __user *argp);
+int kdbus_cmd_send(struct kdbus_conn *conn, struct file *f, void __user *argp);
+int kdbus_cmd_recv(struct kdbus_conn *conn, void __user *argp);
+int kdbus_cmd_free(struct kdbus_conn *conn, void __user *argp);
+
+/**
+ * kdbus_conn_is_ordinary() - Check if connection is ordinary
+ * @conn:		The connection to check
+ *
+ * Return: Non-zero if the connection is an ordinary connection
+ */
+static inline int kdbus_conn_is_ordinary(const struct kdbus_conn *conn)
+{
+	return !(conn->flags & KDBUS_HELLO_SPECIAL_CONN);
+}
+
+/**
+ * kdbus_conn_is_activator() - Check if connection is an activator
+ * @conn:		The connection to check
+ *
+ * Return: Non-zero if the connection is an activator
+ */
+static inline int kdbus_conn_is_activator(const struct kdbus_conn *conn)
+{
+	return conn->flags & KDBUS_HELLO_ACTIVATOR;
+}
+
+/**
+ * kdbus_conn_is_policy_holder() - Check if connection is a policy holder
+ * @conn:		The connection to check
+ *
+ * Return: Non-zero if the connection is a policy holder
+ */
+static inline int kdbus_conn_is_policy_holder(const struct kdbus_conn *conn)
+{
+	return conn->flags & KDBUS_HELLO_POLICY_HOLDER;
+}
+
+/**
+ * kdbus_conn_is_monitor() - Check if connection is a monitor
+ * @conn:		The connection to check
+ *
+ * Return: Non-zero if the connection is a monitor
+ */
+static inline int kdbus_conn_is_monitor(const struct kdbus_conn *conn)
+{
+	return conn->flags & KDBUS_HELLO_MONITOR;
+}
+
+/**
+ * kdbus_conn_lock2() - Lock two connections
+ * @a:		connection A to lock or NULL
+ * @b:		connection B to lock or NULL
+ *
+ * Lock two connections at once. As we need to have a stable locking order, we
+ * always lock the connection with lower memory address first.
+ */
+static inline void kdbus_conn_lock2(struct kdbus_conn *a, struct kdbus_conn *b)
+{
+	if (a < b) {
+		if (a)
+			mutex_lock(&a->lock);
+		if (b && b != a)
+			mutex_lock_nested(&b->lock, !!a);
+	} else {
+		if (b)
+			mutex_lock(&b->lock);
+		if (a && a != b)
+			mutex_lock_nested(&a->lock, !!b);
+	}
+}
+
+/**
+ * kdbus_conn_unlock2() - Unlock two connections
+ * @a:		connection A to unlock or NULL
+ * @b:		connection B to unlock or NULL
+ *
+ * Unlock two connections at once. See kdbus_conn_lock2().
+ */
+static inline void kdbus_conn_unlock2(struct kdbus_conn *a,
+				      struct kdbus_conn *b)
+{
+	if (a)
+		mutex_unlock(&a->lock);
+	if (b && b != a)
+		mutex_unlock(&b->lock);
+}
+
+/**
+ * kdbus_conn_assert_active() - lockdep assert on active lock
+ * @conn:	connection that shall be active
+ *
+ * This verifies via lockdep that the caller holds an active reference to the
+ * given connection.
+ */
+static inline void kdbus_conn_assert_active(struct kdbus_conn *conn)
+{
+	lockdep_assert_held(conn);
+}
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/domain.c linux-4.2-pck/ipc/kdbus/domain.c
--- linux-4.2/ipc/kdbus/domain.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/domain.c	2015-09-08 00:48:22.231697056 -0300
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "bus.h"
+#include "domain.h"
+#include "handle.h"
+#include "item.h"
+#include "limits.h"
+#include "util.h"
+
+static void kdbus_domain_control_free(struct kdbus_node *node)
+{
+	kfree(node);
+}
+
+static struct kdbus_node *kdbus_domain_control_new(struct kdbus_domain *domain,
+						   unsigned int access)
+{
+	struct kdbus_node *node;
+	int ret;
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return ERR_PTR(-ENOMEM);
+
+	kdbus_node_init(node, KDBUS_NODE_CONTROL);
+
+	node->free_cb = kdbus_domain_control_free;
+	node->mode = domain->node.mode;
+	node->mode = S_IRUSR | S_IWUSR;
+	if (access & (KDBUS_MAKE_ACCESS_GROUP | KDBUS_MAKE_ACCESS_WORLD))
+		node->mode |= S_IRGRP | S_IWGRP;
+	if (access & KDBUS_MAKE_ACCESS_WORLD)
+		node->mode |= S_IROTH | S_IWOTH;
+
+	ret = kdbus_node_link(node, &domain->node, "control");
+	if (ret < 0)
+		goto exit_free;
+
+	return node;
+
+exit_free:
+	kdbus_node_drain(node);
+	kdbus_node_unref(node);
+	return ERR_PTR(ret);
+}
+
+static void kdbus_domain_free(struct kdbus_node *node)
+{
+	struct kdbus_domain *domain =
+		container_of(node, struct kdbus_domain, node);
+
+	put_user_ns(domain->user_namespace);
+	ida_destroy(&domain->user_ida);
+	idr_destroy(&domain->user_idr);
+	kfree(domain);
+}
+
+/**
+ * kdbus_domain_new() - create a new domain
+ * @access:		The access mode for this node (KDBUS_MAKE_ACCESS_*)
+ *
+ * Return: a new kdbus_domain on success, ERR_PTR on failure
+ */
+struct kdbus_domain *kdbus_domain_new(unsigned int access)
+{
+	struct kdbus_domain *d;
+	int ret;
+
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return ERR_PTR(-ENOMEM);
+
+	kdbus_node_init(&d->node, KDBUS_NODE_DOMAIN);
+
+	d->node.free_cb = kdbus_domain_free;
+	d->node.mode = S_IRUSR | S_IXUSR;
+	if (access & (KDBUS_MAKE_ACCESS_GROUP | KDBUS_MAKE_ACCESS_WORLD))
+		d->node.mode |= S_IRGRP | S_IXGRP;
+	if (access & KDBUS_MAKE_ACCESS_WORLD)
+		d->node.mode |= S_IROTH | S_IXOTH;
+
+	mutex_init(&d->lock);
+	idr_init(&d->user_idr);
+	ida_init(&d->user_ida);
+
+	/* Pin user namespace so we can guarantee domain-unique bus * names. */
+	d->user_namespace = get_user_ns(current_user_ns());
+
+	ret = kdbus_node_link(&d->node, NULL, NULL);
+	if (ret < 0)
+		goto exit_unref;
+
+	return d;
+
+exit_unref:
+	kdbus_node_drain(&d->node);
+	kdbus_node_unref(&d->node);
+	return ERR_PTR(ret);
+}
+
+/**
+ * kdbus_domain_ref() - take a domain reference
+ * @domain:		Domain
+ *
+ * Return: the domain itself
+ */
+struct kdbus_domain *kdbus_domain_ref(struct kdbus_domain *domain)
+{
+	if (domain)
+		kdbus_node_ref(&domain->node);
+	return domain;
+}
+
+/**
+ * kdbus_domain_unref() - drop a domain reference
+ * @domain:		Domain
+ *
+ * When the last reference is dropped, the domain internal structure
+ * is freed.
+ *
+ * Return: NULL
+ */
+struct kdbus_domain *kdbus_domain_unref(struct kdbus_domain *domain)
+{
+	if (domain)
+		kdbus_node_unref(&domain->node);
+	return NULL;
+}
+
+/**
+ * kdbus_domain_populate() - populate static domain nodes
+ * @domain:	domain to populate
+ * @access:	KDBUS_MAKE_ACCESS_* access restrictions for new nodes
+ *
+ * Allocate and activate static sub-nodes of the given domain. This will fail if
+ * you call it on a non-active node or if the domain was already populated.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kdbus_domain_populate(struct kdbus_domain *domain, unsigned int access)
+{
+	struct kdbus_node *control;
+
+	/*
+	 * Create a control-node for this domain. We drop our own reference
+	 * immediately, effectively causing the node to be deactivated and
+	 * released when the parent domain is.
+	 */
+	control = kdbus_domain_control_new(domain, access);
+	if (IS_ERR(control))
+		return PTR_ERR(control);
+
+	kdbus_node_activate(control);
+	kdbus_node_unref(control);
+	return 0;
+}
+
+/**
+ * kdbus_user_lookup() - lookup a kdbus_user object
+ * @domain:		domain of the user
+ * @uid:		uid of the user; INVALID_UID for an anon user
+ *
+ * Lookup the kdbus user accounting object for the given domain. If INVALID_UID
+ * is passed, a new anonymous user is created which is private to the caller.
+ *
+ * Return: The user object is returned, ERR_PTR on failure.
+ */
+struct kdbus_user *kdbus_user_lookup(struct kdbus_domain *domain, kuid_t uid)
+{
+	struct kdbus_user *u = NULL, *old = NULL;
+	int ret;
+
+	mutex_lock(&domain->lock);
+
+	if (uid_valid(uid)) {
+		old = idr_find(&domain->user_idr, __kuid_val(uid));
+		/*
+		 * If the object is about to be destroyed, ignore it and
+		 * replace the slot in the IDR later on.
+		 */
+		if (old && kref_get_unless_zero(&old->kref)) {
+			mutex_unlock(&domain->lock);
+			return old;
+		}
+	}
+
+	u = kzalloc(sizeof(*u), GFP_KERNEL);
+	if (!u) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	kref_init(&u->kref);
+	u->domain = kdbus_domain_ref(domain);
+	u->uid = uid;
+	atomic_set(&u->buses, 0);
+	atomic_set(&u->connections, 0);
+
+	if (uid_valid(uid)) {
+		if (old) {
+			idr_replace(&domain->user_idr, u, __kuid_val(uid));
+			old->uid = INVALID_UID; /* mark old as removed */
+		} else {
+			ret = idr_alloc(&domain->user_idr, u, __kuid_val(uid),
+					__kuid_val(uid) + 1, GFP_KERNEL);
+			if (ret < 0)
+				goto exit;
+		}
+	}
+
+	/*
+	 * Allocate the smallest possible index for this user; used
+	 * in arrays for accounting user quota in receiver queues.
+	 */
+	ret = ida_simple_get(&domain->user_ida, 1, 0, GFP_KERNEL);
+	if (ret < 0)
+		goto exit;
+
+	u->id = ret;
+	mutex_unlock(&domain->lock);
+	return u;
+
+exit:
+	if (u) {
+		if (uid_valid(u->uid))
+			idr_remove(&domain->user_idr, __kuid_val(u->uid));
+		kdbus_domain_unref(u->domain);
+		kfree(u);
+	}
+	mutex_unlock(&domain->lock);
+	return ERR_PTR(ret);
+}
+
+static void __kdbus_user_free(struct kref *kref)
+{
+	struct kdbus_user *user = container_of(kref, struct kdbus_user, kref);
+
+	WARN_ON(atomic_read(&user->buses) > 0);
+	WARN_ON(atomic_read(&user->connections) > 0);
+
+	mutex_lock(&user->domain->lock);
+	ida_simple_remove(&user->domain->user_ida, user->id);
+	if (uid_valid(user->uid))
+		idr_remove(&user->domain->user_idr, __kuid_val(user->uid));
+	mutex_unlock(&user->domain->lock);
+
+	kdbus_domain_unref(user->domain);
+	kfree(user);
+}
+
+/**
+ * kdbus_user_ref() - take a user reference
+ * @u:		User
+ *
+ * Return: @u is returned
+ */
+struct kdbus_user *kdbus_user_ref(struct kdbus_user *u)
+{
+	if (u)
+		kref_get(&u->kref);
+	return u;
+}
+
+/**
+ * kdbus_user_unref() - drop a user reference
+ * @u:		User
+ *
+ * Return: NULL
+ */
+struct kdbus_user *kdbus_user_unref(struct kdbus_user *u)
+{
+	if (u)
+		kref_put(&u->kref, __kdbus_user_free);
+	return NULL;
+}
diff -Nur linux-4.2/ipc/kdbus/domain.h linux-4.2-pck/ipc/kdbus/domain.h
--- linux-4.2/ipc/kdbus/domain.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/domain.h	2015-09-08 00:48:22.231697056 -0300
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_DOMAIN_H
+#define __KDBUS_DOMAIN_H
+
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/kref.h>
+#include <linux/user_namespace.h>
+
+#include "node.h"
+
+/**
+ * struct kdbus_domain - domain for buses
+ * @node:		Underlying API node
+ * @lock:		Domain data lock
+ * @last_id:		Last used object id
+ * @user_idr:		Set of all users indexed by UID
+ * @user_ida:		Set of all users to compute small indices
+ * @user_namespace:	User namespace, pinned at creation time
+ * @dentry:		Root dentry of VFS mount (don't use outside of kdbusfs)
+ */
+struct kdbus_domain {
+	struct kdbus_node node;
+	struct mutex lock;
+	atomic64_t last_id;
+	struct idr user_idr;
+	struct ida user_ida;
+	struct user_namespace *user_namespace;
+	struct dentry *dentry;
+};
+
+/**
+ * struct kdbus_user - resource accounting for users
+ * @kref:		Reference counter
+ * @domain:		Domain of the user
+ * @id:			Index of this user
+ * @uid:		UID of the user
+ * @buses:		Number of buses the user has created
+ * @connections:	Number of connections the user has created
+ */
+struct kdbus_user {
+	struct kref kref;
+	struct kdbus_domain *domain;
+	unsigned int id;
+	kuid_t uid;
+	atomic_t buses;
+	atomic_t connections;
+};
+
+#define kdbus_domain_from_node(_node) \
+	container_of((_node), struct kdbus_domain, node)
+
+struct kdbus_domain *kdbus_domain_new(unsigned int access);
+struct kdbus_domain *kdbus_domain_ref(struct kdbus_domain *domain);
+struct kdbus_domain *kdbus_domain_unref(struct kdbus_domain *domain);
+int kdbus_domain_populate(struct kdbus_domain *domain, unsigned int access);
+
+#define KDBUS_USER_KERNEL_ID 0 /* ID 0 is reserved for kernel accounting */
+
+struct kdbus_user *kdbus_user_lookup(struct kdbus_domain *domain, kuid_t uid);
+struct kdbus_user *kdbus_user_ref(struct kdbus_user *u);
+struct kdbus_user *kdbus_user_unref(struct kdbus_user *u);
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/endpoint.c linux-4.2-pck/ipc/kdbus/endpoint.c
--- linux-4.2/ipc/kdbus/endpoint.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/endpoint.c	2015-09-08 00:48:22.231697056 -0300
@@ -0,0 +1,303 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/uio.h>
+
+#include "bus.h"
+#include "connection.h"
+#include "domain.h"
+#include "endpoint.h"
+#include "handle.h"
+#include "item.h"
+#include "message.h"
+#include "policy.h"
+
+static void kdbus_ep_free(struct kdbus_node *node)
+{
+	struct kdbus_ep *ep = container_of(node, struct kdbus_ep, node);
+
+	WARN_ON(!list_empty(&ep->conn_list));
+
+	kdbus_policy_db_clear(&ep->policy_db);
+	kdbus_bus_unref(ep->bus);
+	kdbus_user_unref(ep->user);
+	kfree(ep);
+}
+
+static void kdbus_ep_release(struct kdbus_node *node, bool was_active)
+{
+	struct kdbus_ep *ep = container_of(node, struct kdbus_ep, node);
+
+	/* disconnect all connections to this endpoint */
+	for (;;) {
+		struct kdbus_conn *conn;
+
+		mutex_lock(&ep->lock);
+		conn = list_first_entry_or_null(&ep->conn_list,
+						struct kdbus_conn,
+						ep_entry);
+		if (!conn) {
+			mutex_unlock(&ep->lock);
+			break;
+		}
+
+		/* take reference, release lock, disconnect without lock */
+		kdbus_conn_ref(conn);
+		mutex_unlock(&ep->lock);
+
+		kdbus_conn_disconnect(conn, false);
+		kdbus_conn_unref(conn);
+	}
+}
+
+/**
+ * kdbus_ep_new() - create a new endpoint
+ * @bus:		The bus this endpoint will be created for
+ * @name:		The name of the endpoint
+ * @access:		The access flags for this node (KDBUS_MAKE_ACCESS_*)
+ * @uid:		The uid of the node
+ * @gid:		The gid of the node
+ * @is_custom:		Whether this is a custom endpoint
+ *
+ * This function will create a new endpoint with the given
+ * name and properties for a given bus.
+ *
+ * Return: a new kdbus_ep on success, ERR_PTR on failure.
+ */
+struct kdbus_ep *kdbus_ep_new(struct kdbus_bus *bus, const char *name,
+			      unsigned int access, kuid_t uid, kgid_t gid,
+			      bool is_custom)
+{
+	struct kdbus_ep *e;
+	int ret;
+
+	/*
+	 * Validate only custom endpoints names, default endpoints
+	 * with a "bus" name are created when the bus is created
+	 */
+	if (is_custom) {
+		ret = kdbus_verify_uid_prefix(name, bus->domain->user_namespace,
+					      uid);
+		if (ret < 0)
+			return ERR_PTR(ret);
+	}
+
+	e = kzalloc(sizeof(*e), GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+
+	kdbus_node_init(&e->node, KDBUS_NODE_ENDPOINT);
+
+	e->node.free_cb = kdbus_ep_free;
+	e->node.release_cb = kdbus_ep_release;
+	e->node.uid = uid;
+	e->node.gid = gid;
+	e->node.mode = S_IRUSR | S_IWUSR;
+	if (access & (KDBUS_MAKE_ACCESS_GROUP | KDBUS_MAKE_ACCESS_WORLD))
+		e->node.mode |= S_IRGRP | S_IWGRP;
+	if (access & KDBUS_MAKE_ACCESS_WORLD)
+		e->node.mode |= S_IROTH | S_IWOTH;
+
+	mutex_init(&e->lock);
+	INIT_LIST_HEAD(&e->conn_list);
+	kdbus_policy_db_init(&e->policy_db);
+	e->bus = kdbus_bus_ref(bus);
+
+	ret = kdbus_node_link(&e->node, &bus->node, name);
+	if (ret < 0)
+		goto exit_unref;
+
+	/*
+	 * Transactions on custom endpoints are never accounted on the global
+	 * user limits. Instead, for each custom endpoint, we create a custom,
+	 * unique user, which all transactions are accounted on. Regardless of
+	 * the user using that endpoint, it is always accounted on the same
+	 * user-object. This budget is not shared with ordinary users on
+	 * non-custom endpoints.
+	 */
+	if (is_custom) {
+		e->user = kdbus_user_lookup(bus->domain, INVALID_UID);
+		if (IS_ERR(e->user)) {
+			ret = PTR_ERR(e->user);
+			e->user = NULL;
+			goto exit_unref;
+		}
+	}
+
+	return e;
+
+exit_unref:
+	kdbus_node_drain(&e->node);
+	kdbus_node_unref(&e->node);
+	return ERR_PTR(ret);
+}
+
+/**
+ * kdbus_ep_ref() - increase the reference counter of a kdbus_ep
+ * @ep:			The endpoint to reference
+ *
+ * Every user of an endpoint, except for its creator, must add a reference to
+ * the kdbus_ep instance using this function.
+ *
+ * Return: the ep itself
+ */
+struct kdbus_ep *kdbus_ep_ref(struct kdbus_ep *ep)
+{
+	if (ep)
+		kdbus_node_ref(&ep->node);
+	return ep;
+}
+
+/**
+ * kdbus_ep_unref() - decrease the reference counter of a kdbus_ep
+ * @ep:		The ep to unref
+ *
+ * Release a reference. If the reference count drops to 0, the ep will be
+ * freed.
+ *
+ * Return: NULL
+ */
+struct kdbus_ep *kdbus_ep_unref(struct kdbus_ep *ep)
+{
+	if (ep)
+		kdbus_node_unref(&ep->node);
+	return NULL;
+}
+
+/**
+ * kdbus_ep_is_privileged() - check whether a file is privileged
+ * @ep:		endpoint to operate on
+ * @file:	file to test
+ *
+ * Return: True if @file is privileged in the domain of @ep.
+ */
+bool kdbus_ep_is_privileged(struct kdbus_ep *ep, struct file *file)
+{
+	return !ep->user &&
+		file_ns_capable(file, ep->bus->domain->user_namespace,
+				CAP_IPC_OWNER);
+}
+
+/**
+ * kdbus_ep_is_owner() - check whether a file should be treated as bus owner
+ * @ep:		endpoint to operate on
+ * @file:	file to test
+ *
+ * Return: True if @file should be treated as bus owner on @ep
+ */
+bool kdbus_ep_is_owner(struct kdbus_ep *ep, struct file *file)
+{
+	return !ep->user &&
+		(uid_eq(file->f_cred->euid, ep->bus->node.uid) ||
+		 kdbus_ep_is_privileged(ep, file));
+}
+
+/**
+ * kdbus_cmd_ep_make() - handle KDBUS_CMD_ENDPOINT_MAKE
+ * @bus:		bus to operate on
+ * @argp:		command payload
+ *
+ * Return: NULL or newly created endpoint on success, ERR_PTR on failure.
+ */
+struct kdbus_ep *kdbus_cmd_ep_make(struct kdbus_bus *bus, void __user *argp)
+{
+	const char *item_make_name;
+	struct kdbus_ep *ep = NULL;
+	struct kdbus_cmd *cmd;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+		{ .type = KDBUS_ITEM_MAKE_NAME, .mandatory = true },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE |
+				 KDBUS_MAKE_ACCESS_GROUP |
+				 KDBUS_MAKE_ACCESS_WORLD,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	if (ret > 0)
+		return NULL;
+
+	item_make_name = argv[1].item->str;
+
+	ep = kdbus_ep_new(bus, item_make_name, cmd->flags,
+			  current_euid(), current_egid(), true);
+	if (IS_ERR(ep)) {
+		ret = PTR_ERR(ep);
+		ep = NULL;
+		goto exit;
+	}
+
+	if (!kdbus_node_activate(&ep->node)) {
+		ret = -ESHUTDOWN;
+		goto exit;
+	}
+
+exit:
+	ret = kdbus_args_clear(&args, ret);
+	if (ret < 0) {
+		if (ep) {
+			kdbus_node_drain(&ep->node);
+			kdbus_ep_unref(ep);
+		}
+		return ERR_PTR(ret);
+	}
+	return ep;
+}
+
+/**
+ * kdbus_cmd_ep_update() - handle KDBUS_CMD_ENDPOINT_UPDATE
+ * @ep:			endpoint to operate on
+ * @argp:		command payload
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_ep_update(struct kdbus_ep *ep, void __user *argp)
+{
+	struct kdbus_cmd *cmd;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+		{ .type = KDBUS_ITEM_NAME, .multiple = true },
+		{ .type = KDBUS_ITEM_POLICY_ACCESS, .multiple = true },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	ret = kdbus_policy_set(&ep->policy_db, args.items, args.items_size,
+			       0, true, ep);
+	return kdbus_args_clear(&args, ret);
+}
diff -Nur linux-4.2/ipc/kdbus/endpoint.h linux-4.2-pck/ipc/kdbus/endpoint.h
--- linux-4.2/ipc/kdbus/endpoint.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/endpoint.h	2015-09-08 00:48:22.231697056 -0300
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_ENDPOINT_H
+#define __KDBUS_ENDPOINT_H
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/uidgid.h>
+#include "node.h"
+#include "policy.h"
+
+struct kdbus_bus;
+struct kdbus_user;
+
+/**
+ * struct kdbus_ep - endpoint to access a bus
+ * @node:		The kdbus node
+ * @lock:		Endpoint data lock
+ * @bus:		Bus behind this endpoint
+ * @user:		Custom enpoints account against an anonymous user
+ * @policy_db:		Uploaded policy
+ * @conn_list:		Connections of this endpoint
+ *
+ * An endpoint offers access to a bus; the default endpoint node name is "bus".
+ * Additional custom endpoints to the same bus can be created and they can
+ * carry their own policies/filters.
+ */
+struct kdbus_ep {
+	struct kdbus_node node;
+	struct mutex lock;
+
+	/* static */
+	struct kdbus_bus *bus;
+	struct kdbus_user *user;
+
+	/* protected by own locks */
+	struct kdbus_policy_db policy_db;
+
+	/* protected by ep->lock */
+	struct list_head conn_list;
+};
+
+#define kdbus_ep_from_node(_node) \
+	container_of((_node), struct kdbus_ep, node)
+
+struct kdbus_ep *kdbus_ep_new(struct kdbus_bus *bus, const char *name,
+			      unsigned int access, kuid_t uid, kgid_t gid,
+			      bool policy);
+struct kdbus_ep *kdbus_ep_ref(struct kdbus_ep *ep);
+struct kdbus_ep *kdbus_ep_unref(struct kdbus_ep *ep);
+
+bool kdbus_ep_is_privileged(struct kdbus_ep *ep, struct file *file);
+bool kdbus_ep_is_owner(struct kdbus_ep *ep, struct file *file);
+
+struct kdbus_ep *kdbus_cmd_ep_make(struct kdbus_bus *bus, void __user *argp);
+int kdbus_cmd_ep_update(struct kdbus_ep *ep, void __user *argp);
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/fs.c linux-4.2-pck/ipc/kdbus/fs.c
--- linux-4.2/ipc/kdbus/fs.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/fs.c	2015-09-08 00:48:22.231697056 -0300
@@ -0,0 +1,508 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/init.h>
+#include <linux/ipc_namespace.h>
+#include <linux/magic.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/mutex.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "bus.h"
+#include "domain.h"
+#include "endpoint.h"
+#include "fs.h"
+#include "handle.h"
+#include "node.h"
+
+#define kdbus_node_from_dentry(_dentry) \
+	((struct kdbus_node *)(_dentry)->d_fsdata)
+
+static struct inode *fs_inode_get(struct super_block *sb,
+				  struct kdbus_node *node);
+
+/*
+ * Directory Management
+ */
+
+static inline unsigned char kdbus_dt_type(struct kdbus_node *node)
+{
+	switch (node->type) {
+	case KDBUS_NODE_DOMAIN:
+	case KDBUS_NODE_BUS:
+		return DT_DIR;
+	case KDBUS_NODE_CONTROL:
+	case KDBUS_NODE_ENDPOINT:
+		return DT_REG;
+	}
+
+	return DT_UNKNOWN;
+}
+
+static int fs_dir_fop_iterate(struct file *file, struct dir_context *ctx)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	struct kdbus_node *parent = kdbus_node_from_dentry(dentry);
+	struct kdbus_node *old, *next = file->private_data;
+
+	/*
+	 * kdbusfs directory iterator (modelled after sysfs/kernfs)
+	 * When iterating kdbusfs directories, we iterate all children of the
+	 * parent kdbus_node object. We use ctx->pos to store the hash of the
+	 * child and file->private_data to store a reference to the next node
+	 * object. If ctx->pos is not modified via llseek while you iterate a
+	 * directory, then we use the file->private_data node pointer to
+	 * directly access the next node in the tree.
+	 * However, if you directly seek on the directory, we have to find the
+	 * closest node to that position and cannot use our node pointer. This
+	 * means iterating the rb-tree to find the closest match and start over
+	 * from there.
+	 * Note that hash values are not necessarily unique. Therefore, llseek
+	 * is not guaranteed to seek to the same node that you got when you
+	 * retrieved the position. Seeking to 0, 1, 2 and >=INT_MAX is safe,
+	 * though. We could use the inode-number as position, but this would
+	 * require another rb-tree for fast access. Kernfs and others already
+	 * ignore those conflicts, so we should be fine, too.
+	 */
+
+	if (!dir_emit_dots(file, ctx))
+		return 0;
+
+	/* acquire @next; if deactivated, or seek detected, find next node */
+	old = next;
+	if (next && ctx->pos == next->hash) {
+		if (kdbus_node_acquire(next))
+			kdbus_node_ref(next);
+		else
+			next = kdbus_node_next_child(parent, next);
+	} else {
+		next = kdbus_node_find_closest(parent, ctx->pos);
+	}
+	kdbus_node_unref(old);
+
+	while (next) {
+		/* emit @next */
+		file->private_data = next;
+		ctx->pos = next->hash;
+
+		kdbus_node_release(next);
+
+		if (!dir_emit(ctx, next->name, strlen(next->name), next->id,
+			      kdbus_dt_type(next)))
+			return 0;
+
+		/* find next node after @next */
+		old = next;
+		next = kdbus_node_next_child(parent, next);
+		kdbus_node_unref(old);
+	}
+
+	file->private_data = NULL;
+	ctx->pos = INT_MAX;
+
+	return 0;
+}
+
+static loff_t fs_dir_fop_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file_inode(file);
+	loff_t ret;
+
+	/* protect f_off against fop_iterate */
+	mutex_lock(&inode->i_mutex);
+	ret = generic_file_llseek(file, offset, whence);
+	mutex_unlock(&inode->i_mutex);
+
+	return ret;
+}
+
+static int fs_dir_fop_release(struct inode *inode, struct file *file)
+{
+	kdbus_node_unref(file->private_data);
+	return 0;
+}
+
+static const struct file_operations fs_dir_fops = {
+	.read		= generic_read_dir,
+	.iterate	= fs_dir_fop_iterate,
+	.llseek		= fs_dir_fop_llseek,
+	.release	= fs_dir_fop_release,
+};
+
+static struct dentry *fs_dir_iop_lookup(struct inode *dir,
+					struct dentry *dentry,
+					unsigned int flags)
+{
+	struct dentry *dnew = NULL;
+	struct kdbus_node *parent;
+	struct kdbus_node *node;
+	struct inode *inode;
+
+	parent = kdbus_node_from_dentry(dentry->d_parent);
+	if (!kdbus_node_acquire(parent))
+		return NULL;
+
+	/* returns reference to _acquired_ child node */
+	node = kdbus_node_find_child(parent, dentry->d_name.name);
+	if (node) {
+		dentry->d_fsdata = node;
+		inode = fs_inode_get(dir->i_sb, node);
+		if (IS_ERR(inode))
+			dnew = ERR_CAST(inode);
+		else
+			dnew = d_splice_alias(inode, dentry);
+
+		kdbus_node_release(node);
+	}
+
+	kdbus_node_release(parent);
+	return dnew;
+}
+
+static const struct inode_operations fs_dir_iops = {
+	.permission	= generic_permission,
+	.lookup		= fs_dir_iop_lookup,
+};
+
+/*
+ * Inode Management
+ */
+
+static const struct inode_operations fs_inode_iops = {
+	.permission	= generic_permission,
+};
+
+static struct inode *fs_inode_get(struct super_block *sb,
+				  struct kdbus_node *node)
+{
+	struct inode *inode;
+
+	inode = iget_locked(sb, node->id);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	inode->i_private = kdbus_node_ref(node);
+	inode->i_mapping->a_ops = &empty_aops;
+	inode->i_mode = node->mode & S_IALLUGO;
+	inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	inode->i_uid = node->uid;
+	inode->i_gid = node->gid;
+
+	switch (node->type) {
+	case KDBUS_NODE_DOMAIN:
+	case KDBUS_NODE_BUS:
+		inode->i_mode |= S_IFDIR;
+		inode->i_op = &fs_dir_iops;
+		inode->i_fop = &fs_dir_fops;
+		set_nlink(inode, 2);
+		break;
+	case KDBUS_NODE_CONTROL:
+	case KDBUS_NODE_ENDPOINT:
+		inode->i_mode |= S_IFREG;
+		inode->i_op = &fs_inode_iops;
+		inode->i_fop = &kdbus_handle_ops;
+		break;
+	}
+
+	unlock_new_inode(inode);
+
+	return inode;
+}
+
+/*
+ * Superblock Management
+ */
+
+static int fs_super_dop_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	struct kdbus_node *node;
+
+	/* Force lookup on negatives */
+	if (!dentry->d_inode)
+		return 0;
+
+	node = kdbus_node_from_dentry(dentry);
+
+	/* see whether the node has been removed */
+	if (!kdbus_node_is_active(node))
+		return 0;
+
+	return 1;
+}
+
+static void fs_super_dop_release(struct dentry *dentry)
+{
+	kdbus_node_unref(dentry->d_fsdata);
+}
+
+static const struct dentry_operations fs_super_dops = {
+	.d_revalidate	= fs_super_dop_revalidate,
+	.d_release	= fs_super_dop_release,
+};
+
+static void fs_super_sop_evict_inode(struct inode *inode)
+{
+	struct kdbus_node *node = kdbus_node_from_inode(inode);
+
+	truncate_inode_pages_final(&inode->i_data);
+	clear_inode(inode);
+	kdbus_node_unref(node);
+}
+
+static const struct super_operations fs_super_sops = {
+	.statfs		= simple_statfs,
+	.drop_inode	= generic_delete_inode,
+	.evict_inode	= fs_super_sop_evict_inode,
+};
+
+static int fs_super_fill(struct super_block *sb)
+{
+	struct kdbus_domain *domain = sb->s_fs_info;
+	struct inode *inode;
+	int ret;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = KDBUS_SUPER_MAGIC;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_op = &fs_super_sops;
+	sb->s_time_gran = 1;
+
+	inode = fs_inode_get(sb, &domain->node);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root) {
+		/* d_make_root iput()s the inode on failure */
+		return -ENOMEM;
+	}
+
+	/* sb holds domain reference */
+	sb->s_root->d_fsdata = &domain->node;
+	sb->s_d_op = &fs_super_dops;
+
+	/* sb holds root reference */
+	domain->dentry = sb->s_root;
+
+	if (!kdbus_node_activate(&domain->node))
+		return -ESHUTDOWN;
+
+	ret = kdbus_domain_populate(domain, KDBUS_MAKE_ACCESS_WORLD);
+	if (ret < 0)
+		return ret;
+
+	sb->s_flags |= MS_ACTIVE;
+	return 0;
+}
+
+static void fs_super_kill(struct super_block *sb)
+{
+	struct kdbus_domain *domain = sb->s_fs_info;
+
+	if (domain) {
+		kdbus_node_drain(&domain->node);
+		domain->dentry = NULL;
+	}
+
+	kill_anon_super(sb);
+	kdbus_domain_unref(domain);
+}
+
+static int fs_super_set(struct super_block *sb, void *data)
+{
+	int ret;
+
+	ret = set_anon_super(sb, data);
+	if (!ret)
+		sb->s_fs_info = data;
+
+	return ret;
+}
+
+static struct dentry *fs_super_mount(struct file_system_type *fs_type,
+				     int flags, const char *dev_name,
+				     void *data)
+{
+	struct kdbus_domain *domain;
+	struct super_block *sb;
+	int ret;
+
+	domain = kdbus_domain_new(KDBUS_MAKE_ACCESS_WORLD);
+	if (IS_ERR(domain))
+		return ERR_CAST(domain);
+
+	sb = sget(fs_type, NULL, fs_super_set, flags, domain);
+	if (IS_ERR(sb)) {
+		kdbus_node_drain(&domain->node);
+		kdbus_domain_unref(domain);
+		return ERR_CAST(sb);
+	}
+
+	WARN_ON(sb->s_fs_info != domain);
+	WARN_ON(sb->s_root);
+
+	ret = fs_super_fill(sb);
+	if (ret < 0) {
+		/* calls into ->kill_sb() when done */
+		deactivate_locked_super(sb);
+		return ERR_PTR(ret);
+	}
+
+	return dget(sb->s_root);
+}
+
+static struct file_system_type fs_type = {
+	.name		= KBUILD_MODNAME "fs",
+	.owner		= THIS_MODULE,
+	.mount		= fs_super_mount,
+	.kill_sb	= fs_super_kill,
+	.fs_flags	= FS_USERNS_MOUNT,
+};
+
+/**
+ * kdbus_fs_init() - register kdbus filesystem
+ *
+ * This registers a filesystem with the VFS layer. The filesystem is called
+ * `KBUILD_MODNAME "fs"', which usually resolves to `kdbusfs'. The nameing
+ * scheme allows to set KBUILD_MODNAME to "kdbus2" and you will get an
+ * independent filesystem for developers.
+ *
+ * Each mount of the kdbusfs filesystem has an kdbus_domain attached.
+ * Operations on this mount will only affect the attached domain. On each mount
+ * a new domain is automatically created and used for this mount exclusively.
+ * If you want to share a domain across multiple mounts, you need to bind-mount
+ * it.
+ *
+ * Mounts of kdbusfs (with a different domain each) are unrelated to each other
+ * and will never have any effect on any domain but their own.
+ *
+ * Return: 0 on success, negative error otherwise.
+ */
+int kdbus_fs_init(void)
+{
+	return register_filesystem(&fs_type);
+}
+
+/**
+ * kdbus_fs_exit() - unregister kdbus filesystem
+ *
+ * This does the reverse to kdbus_fs_init(). It unregisters the kdbusfs
+ * filesystem from VFS and cleans up any allocated resources.
+ */
+void kdbus_fs_exit(void)
+{
+	unregister_filesystem(&fs_type);
+}
+
+/* acquire domain of @node, making sure all ancestors are active */
+static struct kdbus_domain *fs_acquire_domain(struct kdbus_node *node)
+{
+	struct kdbus_domain *domain;
+	struct kdbus_node *iter;
+
+	/* caller must guarantee that @node is linked */
+	for (iter = node; iter->parent; iter = iter->parent)
+		if (!kdbus_node_is_active(iter->parent))
+			return NULL;
+
+	/* root nodes are always domains */
+	if (WARN_ON(iter->type != KDBUS_NODE_DOMAIN))
+		return NULL;
+
+	domain = kdbus_domain_from_node(iter);
+	if (!kdbus_node_acquire(&domain->node))
+		return NULL;
+
+	return domain;
+}
+
+/**
+ * kdbus_fs_flush() - flush dcache entries of a node
+ * @node:		Node to flush entries of
+ *
+ * This flushes all VFS filesystem cache entries for a node and all its
+ * children. This should be called whenever a node is destroyed during
+ * runtime. It will flush the cache entries so the linked objects can be
+ * deallocated.
+ *
+ * This is a no-op if you call it on active nodes (they really should stay in
+ * cache) or on nodes with deactivated parents (flushing the parent is enough).
+ * Furthermore, there is no need to call it on nodes whose lifetime is bound to
+ * their parents'. In those cases, the parent-flush will always also flush the
+ * children.
+ */
+void kdbus_fs_flush(struct kdbus_node *node)
+{
+	struct dentry *dentry, *parent_dentry = NULL;
+	struct kdbus_domain *domain;
+	struct qstr name;
+
+	/* active nodes should remain in cache */
+	if (!kdbus_node_is_deactivated(node))
+		return;
+
+	/* nodes that were never linked were never instantiated */
+	if (!node->parent)
+		return;
+
+	/* acquire domain and verify all ancestors are active */
+	domain = fs_acquire_domain(node);
+	if (!domain)
+		return;
+
+	switch (node->type) {
+	case KDBUS_NODE_ENDPOINT:
+		if (WARN_ON(!node->parent || !node->parent->name))
+			goto exit;
+
+		name.name = node->parent->name;
+		name.len = strlen(node->parent->name);
+		parent_dentry = d_hash_and_lookup(domain->dentry, &name);
+		if (IS_ERR_OR_NULL(parent_dentry))
+			goto exit;
+
+		/* fallthrough */
+	case KDBUS_NODE_BUS:
+		if (WARN_ON(!node->name))
+			goto exit;
+
+		name.name = node->name;
+		name.len = strlen(node->name);
+		dentry = d_hash_and_lookup(parent_dentry ? : domain->dentry,
+					   &name);
+		if (!IS_ERR_OR_NULL(dentry)) {
+			d_invalidate(dentry);
+			dput(dentry);
+		}
+
+		dput(parent_dentry);
+		break;
+
+	default:
+		/* all other types are bound to their parent lifetime */
+		break;
+	}
+
+exit:
+	kdbus_node_release(&domain->node);
+}
diff -Nur linux-4.2/ipc/kdbus/fs.h linux-4.2-pck/ipc/kdbus/fs.h
--- linux-4.2/ipc/kdbus/fs.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/fs.h	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUSFS_H
+#define __KDBUSFS_H
+
+#include <linux/kernel.h>
+
+struct kdbus_node;
+
+int kdbus_fs_init(void);
+void kdbus_fs_exit(void);
+void kdbus_fs_flush(struct kdbus_node *node);
+
+#define kdbus_node_from_inode(_inode) \
+	((struct kdbus_node *)(_inode)->i_private)
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/handle.c linux-4.2-pck/ipc/kdbus/handle.c
--- linux-4.2/ipc/kdbus/handle.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/handle.c	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,691 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/kdev_t.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+
+#include "bus.h"
+#include "connection.h"
+#include "endpoint.h"
+#include "fs.h"
+#include "handle.h"
+#include "item.h"
+#include "match.h"
+#include "message.h"
+#include "names.h"
+#include "domain.h"
+#include "policy.h"
+
+static int kdbus_args_verify(struct kdbus_args *args)
+{
+	struct kdbus_item *item;
+	size_t i;
+	int ret;
+
+	KDBUS_ITEMS_FOREACH(item, args->items, args->items_size) {
+		struct kdbus_arg *arg = NULL;
+
+		if (!KDBUS_ITEM_VALID(item, args->items, args->items_size))
+			return -EINVAL;
+
+		for (i = 0; i < args->argc; ++i)
+			if (args->argv[i].type == item->type)
+				break;
+		if (i >= args->argc)
+			return -EINVAL;
+
+		arg = &args->argv[i];
+
+		ret = kdbus_item_validate(item);
+		if (ret < 0)
+			return ret;
+
+		if (arg->item && !arg->multiple)
+			return -EINVAL;
+
+		arg->item = item;
+	}
+
+	if (!KDBUS_ITEMS_END(item, args->items, args->items_size))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int kdbus_args_negotiate(struct kdbus_args *args)
+{
+	struct kdbus_item __user *user;
+	struct kdbus_item *negotiation;
+	size_t i, j, num;
+
+	/*
+	 * If KDBUS_FLAG_NEGOTIATE is set, we overwrite the flags field with
+	 * the set of supported flags. Furthermore, if an KDBUS_ITEM_NEGOTIATE
+	 * item is passed, we iterate its payload (array of u64, each set to an
+	 * item type) and clear all unsupported item-types to 0.
+	 * The caller might do this recursively, if other flags or objects are
+	 * embedded in the payload itself.
+	 */
+
+	if (args->cmd->flags & KDBUS_FLAG_NEGOTIATE) {
+		if (put_user(args->allowed_flags & ~KDBUS_FLAG_NEGOTIATE,
+			     &args->user->flags))
+			return -EFAULT;
+	}
+
+	if (args->argc < 1 || args->argv[0].type != KDBUS_ITEM_NEGOTIATE ||
+	    !args->argv[0].item)
+		return 0;
+
+	negotiation = args->argv[0].item;
+	user = (struct kdbus_item __user *)
+		((u8 __user *)args->user +
+		 ((u8 *)negotiation - (u8 *)args->cmd));
+	num = KDBUS_ITEM_PAYLOAD_SIZE(negotiation) / sizeof(u64);
+
+	for (i = 0; i < num; ++i) {
+		for (j = 0; j < args->argc; ++j)
+			if (negotiation->data64[i] == args->argv[j].type)
+				break;
+
+		if (j < args->argc)
+			continue;
+
+		/* this item is not supported, clear it out */
+		negotiation->data64[i] = 0;
+		if (put_user(negotiation->data64[i], &user->data64[i]))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * __kdbus_args_parse() - parse payload of kdbus command
+ * @args:		object to parse data into
+ * @is_cmd:		whether this is a command or msg payload
+ * @argp:		user-space location of command payload to parse
+ * @type_size:		overall size of command payload to parse
+ * @items_offset:	offset of items array in command payload
+ * @out:		output variable to store pointer to copied payload
+ *
+ * This parses the ioctl payload at user-space location @argp into @args. @args
+ * must be pre-initialized by the caller to reflect the supported flags and
+ * items of this command. This parser will then copy the command payload into
+ * kernel-space, verify correctness and consistency and cache pointers to parsed
+ * items and other data in @args.
+ *
+ * If this function succeeded, you must call kdbus_args_clear() to release
+ * allocated resources before destroying @args.
+ *
+ * This can also be used to import kdbus_msg objects. In that case, @is_cmd must
+ * be set to 'false' and the 'return_flags' field will not be touched (as it
+ * doesn't exist on kdbus_msg).
+ *
+ * Return: On failure a negative error code is returned. Otherwise, 1 is
+ * returned if negotiation was requested, 0 if not.
+ */
+int __kdbus_args_parse(struct kdbus_args *args, bool is_cmd, void __user *argp,
+		       size_t type_size, size_t items_offset, void **out)
+{
+	u64 user_size;
+	int ret, i;
+
+	ret = kdbus_copy_from_user(&user_size, argp, sizeof(user_size));
+	if (ret < 0)
+		return ret;
+
+	if (user_size < type_size)
+		return -EINVAL;
+	if (user_size > KDBUS_CMD_MAX_SIZE)
+		return -EMSGSIZE;
+
+	if (user_size <= sizeof(args->cmd_buf)) {
+		if (copy_from_user(args->cmd_buf, argp, user_size))
+			return -EFAULT;
+		args->cmd = (void*)args->cmd_buf;
+	} else {
+		args->cmd = memdup_user(argp, user_size);
+		if (IS_ERR(args->cmd))
+			return PTR_ERR(args->cmd);
+	}
+
+	if (args->cmd->size != user_size) {
+		ret = -EINVAL;
+		goto error;
+	}
+
+	if (is_cmd)
+		args->cmd->return_flags = 0;
+	args->user = argp;
+	args->items = (void *)((u8 *)args->cmd + items_offset);
+	args->items_size = args->cmd->size - items_offset;
+	args->is_cmd = is_cmd;
+
+	if (args->cmd->flags & ~args->allowed_flags) {
+		ret = -EINVAL;
+		goto error;
+	}
+
+	ret = kdbus_args_verify(args);
+	if (ret < 0)
+		goto error;
+
+	ret = kdbus_args_negotiate(args);
+	if (ret < 0)
+		goto error;
+
+	/* mandatory items must be given (but not on negotiation) */
+	if (!(args->cmd->flags & KDBUS_FLAG_NEGOTIATE)) {
+		for (i = 0; i < args->argc; ++i)
+			if (args->argv[i].mandatory && !args->argv[i].item) {
+				ret = -EINVAL;
+				goto error;
+			}
+	}
+
+	*out = args->cmd;
+	return !!(args->cmd->flags & KDBUS_FLAG_NEGOTIATE);
+
+error:
+	return kdbus_args_clear(args, ret);
+}
+
+/**
+ * kdbus_args_clear() - release allocated command resources
+ * @args:	object to release resources of
+ * @ret:	return value of this command
+ *
+ * This frees all allocated resources on @args and copies the command result
+ * flags into user-space. @ret is usually returned unchanged by this function,
+ * so it can be used in the final 'return' statement of the command handler.
+ *
+ * Return: -EFAULT if return values cannot be copied into user-space, otherwise
+ *         @ret is returned unchanged.
+ */
+int kdbus_args_clear(struct kdbus_args *args, int ret)
+{
+	if (!args)
+		return ret;
+
+	if (!IS_ERR_OR_NULL(args->cmd)) {
+		if (args->is_cmd && put_user(args->cmd->return_flags,
+					     &args->user->return_flags))
+			ret = -EFAULT;
+		if (args->cmd != (void*)args->cmd_buf)
+			kfree(args->cmd);
+		args->cmd = NULL;
+	}
+
+	return ret;
+}
+
+/**
+ * enum kdbus_handle_type - type an handle can be of
+ * @KDBUS_HANDLE_NONE:		no type set, yet
+ * @KDBUS_HANDLE_BUS_OWNER:	bus owner
+ * @KDBUS_HANDLE_EP_OWNER:	endpoint owner
+ * @KDBUS_HANDLE_CONNECTED:	endpoint connection after HELLO
+ */
+enum kdbus_handle_type {
+	KDBUS_HANDLE_NONE,
+	KDBUS_HANDLE_BUS_OWNER,
+	KDBUS_HANDLE_EP_OWNER,
+	KDBUS_HANDLE_CONNECTED,
+};
+
+/**
+ * struct kdbus_handle - handle to the kdbus system
+ * @lock:		handle lock
+ * @type:		type of this handle (KDBUS_HANDLE_*)
+ * @bus_owner:		bus this handle owns
+ * @ep_owner:		endpoint this handle owns
+ * @conn:		connection this handle owns
+ */
+struct kdbus_handle {
+	struct mutex lock;
+
+	enum kdbus_handle_type type;
+	union {
+		struct kdbus_bus *bus_owner;
+		struct kdbus_ep *ep_owner;
+		struct kdbus_conn *conn;
+	};
+};
+
+static int kdbus_handle_open(struct inode *inode, struct file *file)
+{
+	struct kdbus_handle *handle;
+	struct kdbus_node *node;
+	int ret;
+
+	node = kdbus_node_from_inode(inode);
+	if (!kdbus_node_acquire(node))
+		return -ESHUTDOWN;
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	mutex_init(&handle->lock);
+	handle->type = KDBUS_HANDLE_NONE;
+
+	file->private_data = handle;
+	ret = 0;
+
+exit:
+	kdbus_node_release(node);
+	return ret;
+}
+
+static int kdbus_handle_release(struct inode *inode, struct file *file)
+{
+	struct kdbus_handle *handle = file->private_data;
+
+	switch (handle->type) {
+	case KDBUS_HANDLE_BUS_OWNER:
+		if (handle->bus_owner) {
+			kdbus_node_drain(&handle->bus_owner->node);
+			kdbus_bus_unref(handle->bus_owner);
+		}
+		break;
+	case KDBUS_HANDLE_EP_OWNER:
+		if (handle->ep_owner) {
+			kdbus_node_drain(&handle->ep_owner->node);
+			kdbus_ep_unref(handle->ep_owner);
+		}
+		break;
+	case KDBUS_HANDLE_CONNECTED:
+		kdbus_conn_disconnect(handle->conn, false);
+		kdbus_conn_unref(handle->conn);
+		break;
+	case KDBUS_HANDLE_NONE:
+		/* nothing to clean up */
+		break;
+	}
+
+	kfree(handle);
+
+	return 0;
+}
+
+static long kdbus_handle_ioctl_control(struct file *file, unsigned int cmd,
+				       void __user *argp)
+{
+	struct kdbus_handle *handle = file->private_data;
+	struct kdbus_node *node = file_inode(file)->i_private;
+	struct kdbus_domain *domain;
+	int ret = 0;
+
+	if (!kdbus_node_acquire(node))
+		return -ESHUTDOWN;
+
+	/*
+	 * The parent of control-nodes is always a domain, make sure to pin it
+	 * so the parent is actually valid.
+	 */
+	domain = kdbus_domain_from_node(node->parent);
+	if (!kdbus_node_acquire(&domain->node)) {
+		kdbus_node_release(node);
+		return -ESHUTDOWN;
+	}
+
+	switch (cmd) {
+	case KDBUS_CMD_BUS_MAKE: {
+		struct kdbus_bus *bus;
+
+		bus = kdbus_cmd_bus_make(domain, argp);
+		if (IS_ERR_OR_NULL(bus)) {
+			ret = PTR_ERR_OR_ZERO(bus);
+			break;
+		}
+
+		handle->bus_owner = bus;
+		ret = KDBUS_HANDLE_BUS_OWNER;
+		break;
+	}
+
+	default:
+		ret = -EBADFD;
+		break;
+	}
+
+	kdbus_node_release(&domain->node);
+	kdbus_node_release(node);
+	return ret;
+}
+
+static long kdbus_handle_ioctl_ep(struct file *file, unsigned int cmd,
+				  void __user *buf)
+{
+	struct kdbus_handle *handle = file->private_data;
+	struct kdbus_node *node = file_inode(file)->i_private;
+	struct kdbus_ep *ep, *file_ep = kdbus_ep_from_node(node);
+	struct kdbus_bus *bus = file_ep->bus;
+	struct kdbus_conn *conn;
+	int ret = 0;
+
+	if (!kdbus_node_acquire(node))
+		return -ESHUTDOWN;
+
+	switch (cmd) {
+	case KDBUS_CMD_ENDPOINT_MAKE: {
+		/* creating custom endpoints is a privileged operation */
+		if (!kdbus_ep_is_owner(file_ep, file)) {
+			ret = -EPERM;
+			break;
+		}
+
+		ep = kdbus_cmd_ep_make(bus, buf);
+		if (IS_ERR_OR_NULL(ep)) {
+			ret = PTR_ERR_OR_ZERO(ep);
+			break;
+		}
+
+		handle->ep_owner = ep;
+		ret = KDBUS_HANDLE_EP_OWNER;
+		break;
+	}
+
+	case KDBUS_CMD_HELLO:
+		conn = kdbus_cmd_hello(file_ep, file, buf);
+		if (IS_ERR_OR_NULL(conn)) {
+			ret = PTR_ERR_OR_ZERO(conn);
+			break;
+		}
+
+		handle->conn = conn;
+		ret = KDBUS_HANDLE_CONNECTED;
+		break;
+
+	default:
+		ret = -EBADFD;
+		break;
+	}
+
+	kdbus_node_release(node);
+	return ret;
+}
+
+static long kdbus_handle_ioctl_ep_owner(struct file *file, unsigned int command,
+					void __user *buf)
+{
+	struct kdbus_handle *handle = file->private_data;
+	struct kdbus_ep *ep = handle->ep_owner;
+	int ret;
+
+	if (!kdbus_node_acquire(&ep->node))
+		return -ESHUTDOWN;
+
+	switch (command) {
+	case KDBUS_CMD_ENDPOINT_UPDATE:
+		ret = kdbus_cmd_ep_update(ep, buf);
+		break;
+	default:
+		ret = -EBADFD;
+		break;
+	}
+
+	kdbus_node_release(&ep->node);
+	return ret;
+}
+
+static long kdbus_handle_ioctl_connected(struct file *file,
+					 unsigned int command, void __user *buf)
+{
+	struct kdbus_handle *handle = file->private_data;
+	struct kdbus_conn *conn = handle->conn;
+	struct kdbus_conn *release_conn = NULL;
+	int ret;
+
+	release_conn = conn;
+	ret = kdbus_conn_acquire(release_conn);
+	if (ret < 0)
+		return ret;
+
+	switch (command) {
+	case KDBUS_CMD_BYEBYE:
+		/*
+		 * BYEBYE is special; we must not acquire a connection when
+		 * calling into kdbus_conn_disconnect() or we will deadlock,
+		 * because kdbus_conn_disconnect() will wait for all acquired
+		 * references to be dropped.
+		 */
+		kdbus_conn_release(release_conn);
+		release_conn = NULL;
+		ret = kdbus_cmd_byebye_unlocked(conn, buf);
+		break;
+	case KDBUS_CMD_NAME_ACQUIRE:
+		ret = kdbus_cmd_name_acquire(conn, buf);
+		break;
+	case KDBUS_CMD_NAME_RELEASE:
+		ret = kdbus_cmd_name_release(conn, buf);
+		break;
+	case KDBUS_CMD_LIST:
+		ret = kdbus_cmd_list(conn, buf);
+		break;
+	case KDBUS_CMD_CONN_INFO:
+		ret = kdbus_cmd_conn_info(conn, buf);
+		break;
+	case KDBUS_CMD_BUS_CREATOR_INFO:
+		ret = kdbus_cmd_bus_creator_info(conn, buf);
+		break;
+	case KDBUS_CMD_UPDATE:
+		ret = kdbus_cmd_update(conn, buf);
+		break;
+	case KDBUS_CMD_MATCH_ADD:
+		ret = kdbus_cmd_match_add(conn, buf);
+		break;
+	case KDBUS_CMD_MATCH_REMOVE:
+		ret = kdbus_cmd_match_remove(conn, buf);
+		break;
+	case KDBUS_CMD_SEND:
+		ret = kdbus_cmd_send(conn, file, buf);
+		break;
+	case KDBUS_CMD_RECV:
+		ret = kdbus_cmd_recv(conn, buf);
+		break;
+	case KDBUS_CMD_FREE:
+		ret = kdbus_cmd_free(conn, buf);
+		break;
+	default:
+		ret = -EBADFD;
+		break;
+	}
+
+	kdbus_conn_release(release_conn);
+	return ret;
+}
+
+static long kdbus_handle_ioctl(struct file *file, unsigned int cmd,
+			       unsigned long arg)
+{
+	struct kdbus_handle *handle = file->private_data;
+	struct kdbus_node *node = kdbus_node_from_inode(file_inode(file));
+	void __user *argp = (void __user *)arg;
+	long ret = -EBADFD;
+
+	switch (cmd) {
+	case KDBUS_CMD_BUS_MAKE:
+	case KDBUS_CMD_ENDPOINT_MAKE:
+	case KDBUS_CMD_HELLO:
+		mutex_lock(&handle->lock);
+		if (handle->type == KDBUS_HANDLE_NONE) {
+			if (node->type == KDBUS_NODE_CONTROL)
+				ret = kdbus_handle_ioctl_control(file, cmd,
+								 argp);
+			else if (node->type == KDBUS_NODE_ENDPOINT)
+				ret = kdbus_handle_ioctl_ep(file, cmd, argp);
+
+			if (ret > 0) {
+				/*
+				 * The data given via open() is not sufficient
+				 * to setup a kdbus handle. Hence, we require
+				 * the user to perform a setup ioctl. This setup
+				 * can only be performed once and defines the
+				 * type of the handle. The different setup
+				 * ioctls are locked against each other so they
+				 * cannot race. Once the handle type is set,
+				 * the type-dependent ioctls are enabled. To
+				 * improve performance, we don't lock those via
+				 * handle->lock. Instead, we issue a
+				 * write-barrier before performing the
+				 * type-change, which pairs with smp_rmb() in
+				 * all handlers that access the type field. This
+				 * guarantees the handle is fully setup, if
+				 * handle->type is set. If handle->type is
+				 * unset, you must not make any assumptions
+				 * without taking handle->lock.
+				 * Note that handle->type is only set once. It
+				 * will never change afterwards.
+				 */
+				smp_wmb();
+				handle->type = ret;
+			}
+		}
+		mutex_unlock(&handle->lock);
+		break;
+
+	case KDBUS_CMD_ENDPOINT_UPDATE:
+	case KDBUS_CMD_BYEBYE:
+	case KDBUS_CMD_NAME_ACQUIRE:
+	case KDBUS_CMD_NAME_RELEASE:
+	case KDBUS_CMD_LIST:
+	case KDBUS_CMD_CONN_INFO:
+	case KDBUS_CMD_BUS_CREATOR_INFO:
+	case KDBUS_CMD_UPDATE:
+	case KDBUS_CMD_MATCH_ADD:
+	case KDBUS_CMD_MATCH_REMOVE:
+	case KDBUS_CMD_SEND:
+	case KDBUS_CMD_RECV:
+	case KDBUS_CMD_FREE: {
+		enum kdbus_handle_type type;
+
+		/*
+		 * This read-barrier pairs with smp_wmb() of the handle setup.
+		 * it guarantees the handle is fully written, in case the
+		 * type has been set. It allows us to access the handle without
+		 * taking handle->lock, given the guarantee that the type is
+		 * only ever set once, and stays constant afterwards.
+		 * Furthermore, the handle object itself is not modified in any
+		 * way after the type is set. That is, the type-field is the
+		 * last field that is written on any handle. If it has not been
+		 * set, we must not access the handle here.
+		 */
+		type = handle->type;
+		smp_rmb();
+
+		if (type == KDBUS_HANDLE_EP_OWNER)
+			ret = kdbus_handle_ioctl_ep_owner(file, cmd, argp);
+		else if (type == KDBUS_HANDLE_CONNECTED)
+			ret = kdbus_handle_ioctl_connected(file, cmd, argp);
+
+		break;
+	}
+	default:
+		ret = -ENOTTY;
+		break;
+	}
+
+	return ret < 0 ? ret : 0;
+}
+
+static unsigned int kdbus_handle_poll(struct file *file,
+				      struct poll_table_struct *wait)
+{
+	struct kdbus_handle *handle = file->private_data;
+	enum kdbus_handle_type type;
+	unsigned int mask = POLLOUT | POLLWRNORM;
+
+	/*
+	 * This pairs with smp_wmb() during handle setup. It guarantees that
+	 * _iff_ the handle type is set, handle->conn is valid. Furthermore,
+	 * _iff_ the type is set, the handle object is constant and never
+	 * changed again. If it's not set, we must not access the handle but
+	 * bail out. We also must assume no setup has taken place, yet.
+	 */
+	type = handle->type;
+	smp_rmb();
+
+	/* Only a connected endpoint can read/write data */
+	if (type != KDBUS_HANDLE_CONNECTED)
+		return POLLERR | POLLHUP;
+
+	poll_wait(file, &handle->conn->wait, wait);
+
+	/*
+	 * Verify the connection hasn't been deactivated _after_ adding the
+	 * wait-queue. This guarantees, that if the connection is deactivated
+	 * after we checked it, the waitqueue is signaled and we're called
+	 * again.
+	 */
+	if (!kdbus_conn_active(handle->conn))
+		return POLLERR | POLLHUP;
+
+	if (!list_empty(&handle->conn->queue.msg_list) ||
+	    atomic_read(&handle->conn->lost_count) > 0)
+		mask |= POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
+static int kdbus_handle_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kdbus_handle *handle = file->private_data;
+	enum kdbus_handle_type type;
+	int ret = -EBADFD;
+
+	/*
+	 * This pairs with smp_wmb() during handle setup. It guarantees that
+	 * _iff_ the handle type is set, handle->conn is valid. Furthermore,
+	 * _iff_ the type is set, the handle object is constant and never
+	 * changed again. If it's not set, we must not access the handle but
+	 * bail out. We also must assume no setup has taken place, yet.
+	 */
+	type = handle->type;
+	smp_rmb();
+
+	/* Only connected handles have a pool we can map */
+	if (type == KDBUS_HANDLE_CONNECTED)
+		ret = kdbus_pool_mmap(handle->conn->pool, vma);
+
+	return ret;
+}
+
+const struct file_operations kdbus_handle_ops = {
+	.owner =		THIS_MODULE,
+	.open =			kdbus_handle_open,
+	.release =		kdbus_handle_release,
+	.poll =			kdbus_handle_poll,
+	.llseek =		noop_llseek,
+	.unlocked_ioctl =	kdbus_handle_ioctl,
+	.mmap =			kdbus_handle_mmap,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl =		kdbus_handle_ioctl,
+#endif
+};
diff -Nur linux-4.2/ipc/kdbus/handle.h linux-4.2-pck/ipc/kdbus/handle.h
--- linux-4.2/ipc/kdbus/handle.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/handle.h	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_HANDLE_H
+#define __KDBUS_HANDLE_H
+
+#include <linux/fs.h>
+#include <uapi/linux/kdbus.h>
+
+extern const struct file_operations kdbus_handle_ops;
+
+/**
+ * kdbus_arg - information and state of a single ioctl command item
+ * @type:		item type
+ * @item:		set by the parser to the first found item of this type
+ * @multiple:		whether multiple items of this type are allowed
+ * @mandatory:		whether at least one item of this type is required
+ *
+ * This structure describes a single item in an ioctl command payload. The
+ * caller has to pre-fill the type and flags, the parser will then use this
+ * information to verify the ioctl payload. @item is set by the parser to point
+ * to the first occurrence of the item.
+ */
+struct kdbus_arg {
+	u64 type;
+	struct kdbus_item *item;
+	bool multiple : 1;
+	bool mandatory : 1;
+};
+
+/**
+ * kdbus_args - information and state of ioctl command parser
+ * @allowed_flags:	set of flags this command supports
+ * @argc:		number of items in @argv
+ * @argv:		array of items this command supports
+ * @user:		set by parser to user-space location of current command
+ * @cmd:		set by parser to kernel copy of command payload
+ * @cmd_buf:		inline buf to avoid kmalloc() on small cmds
+ * @items:		points to item array in @cmd
+ * @items_size:		size of @items in bytes
+ * @is_cmd:		whether this is a command-payload or msg-payload
+ *
+ * This structure is used to parse ioctl command payloads on each invocation.
+ * The ioctl handler has to pre-fill the flags and allowed items before passing
+ * the object to kdbus_args_parse(). The parser will copy the command payload
+ * into kernel-space and verify the correctness of the data.
+ *
+ * We use a 256 bytes buffer for small command payloads, to be allocated on
+ * stack on syscall entrance.
+ */
+struct kdbus_args {
+	u64 allowed_flags;
+	size_t argc;
+	struct kdbus_arg *argv;
+
+	struct kdbus_cmd __user *user;
+	struct kdbus_cmd *cmd;
+	u8 cmd_buf[256];
+
+	struct kdbus_item *items;
+	size_t items_size;
+	bool is_cmd : 1;
+};
+
+int __kdbus_args_parse(struct kdbus_args *args, bool is_cmd, void __user *argp,
+		       size_t type_size, size_t items_offset, void **out);
+int kdbus_args_clear(struct kdbus_args *args, int ret);
+
+#define kdbus_args_parse(_args, _argp, _v)                              \
+	({                                                              \
+		BUILD_BUG_ON(offsetof(typeof(**(_v)), size) !=          \
+			     offsetof(struct kdbus_cmd, size));         \
+		BUILD_BUG_ON(offsetof(typeof(**(_v)), flags) !=         \
+			     offsetof(struct kdbus_cmd, flags));        \
+		BUILD_BUG_ON(offsetof(typeof(**(_v)), return_flags) !=  \
+			     offsetof(struct kdbus_cmd, return_flags)); \
+		__kdbus_args_parse((_args), 1, (_argp), sizeof(**(_v)), \
+				   offsetof(typeof(**(_v)), items),     \
+				   (void **)(_v));                      \
+	})
+
+#define kdbus_args_parse_msg(_args, _argp, _v)                          \
+	({                                                              \
+		BUILD_BUG_ON(offsetof(typeof(**(_v)), size) !=          \
+			     offsetof(struct kdbus_cmd, size));         \
+		BUILD_BUG_ON(offsetof(typeof(**(_v)), flags) !=         \
+			     offsetof(struct kdbus_cmd, flags));        \
+		__kdbus_args_parse((_args), 0, (_argp), sizeof(**(_v)), \
+				   offsetof(typeof(**(_v)), items),     \
+				   (void **)(_v));                      \
+	})
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/item.c linux-4.2-pck/ipc/kdbus/item.c
--- linux-4.2/ipc/kdbus/item.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/item.c	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,293 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+
+#include "item.h"
+#include "limits.h"
+#include "util.h"
+
+/*
+ * This verifies the string at position @str with size @size is properly
+ * zero-terminated and does not contain a 0-byte but at the end.
+ */
+static bool kdbus_str_valid(const char *str, size_t size)
+{
+	return size > 0 && memchr(str, '\0', size) == str + size - 1;
+}
+
+/**
+ * kdbus_item_validate_name() - validate an item containing a name
+ * @item:		Item to validate
+ *
+ * Return: zero on success or an negative error code on failure
+ */
+int kdbus_item_validate_name(const struct kdbus_item *item)
+{
+	const char *name = item->str;
+	unsigned int i;
+	size_t len;
+
+	if (item->size < KDBUS_ITEM_HEADER_SIZE + 2)
+		return -EINVAL;
+
+	if (item->size > KDBUS_ITEM_HEADER_SIZE +
+			 KDBUS_SYSNAME_MAX_LEN + 1)
+		return -ENAMETOOLONG;
+
+	if (!kdbus_str_valid(name, KDBUS_ITEM_PAYLOAD_SIZE(item)))
+		return -EINVAL;
+
+	len = strlen(name);
+	if (len == 0)
+		return -EINVAL;
+
+	for (i = 0; i < len; i++) {
+		if (isalpha(name[i]))
+			continue;
+		if (isdigit(name[i]))
+			continue;
+		if (name[i] == '_')
+			continue;
+		if (i > 0 && i + 1 < len && (name[i] == '-' || name[i] == '.'))
+			continue;
+
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * kdbus_item_validate() - validate a single item
+ * @item:	item to validate
+ *
+ * Return: 0 if item is valid, negative error code if not.
+ */
+int kdbus_item_validate(const struct kdbus_item *item)
+{
+	size_t payload_size = KDBUS_ITEM_PAYLOAD_SIZE(item);
+	size_t l;
+	int ret;
+
+	BUILD_BUG_ON(KDBUS_ITEM_HEADER_SIZE !=
+		     sizeof(struct kdbus_item_header));
+
+	if (item->size < KDBUS_ITEM_HEADER_SIZE)
+		return -EINVAL;
+
+	switch (item->type) {
+	case KDBUS_ITEM_NEGOTIATE:
+		if (payload_size % sizeof(u64) != 0)
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_PAYLOAD_VEC:
+	case KDBUS_ITEM_PAYLOAD_OFF:
+		if (payload_size != sizeof(struct kdbus_vec))
+			return -EINVAL;
+		if (item->vec.size == 0 || item->vec.size > SIZE_MAX)
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_PAYLOAD_MEMFD:
+		if (payload_size != sizeof(struct kdbus_memfd))
+			return -EINVAL;
+		if (item->memfd.size == 0 || item->memfd.size > SIZE_MAX)
+			return -EINVAL;
+		if (item->memfd.fd < 0)
+			return -EBADF;
+		break;
+
+	case KDBUS_ITEM_FDS:
+		if (payload_size % sizeof(int) != 0)
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_CANCEL_FD:
+		if (payload_size != sizeof(int))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_BLOOM_PARAMETER:
+		if (payload_size != sizeof(struct kdbus_bloom_parameter))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_BLOOM_FILTER:
+		/* followed by the bloom-mask, depends on the bloom-size */
+		if (payload_size < sizeof(struct kdbus_bloom_filter))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_BLOOM_MASK:
+		/* size depends on bloom-size of bus */
+		break;
+
+	case KDBUS_ITEM_CONN_DESCRIPTION:
+	case KDBUS_ITEM_MAKE_NAME:
+		ret = kdbus_item_validate_name(item);
+		if (ret < 0)
+			return ret;
+		break;
+
+	case KDBUS_ITEM_ATTACH_FLAGS_SEND:
+	case KDBUS_ITEM_ATTACH_FLAGS_RECV:
+	case KDBUS_ITEM_ID:
+	case KDBUS_ITEM_DST_ID:
+		if (payload_size != sizeof(u64))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_TIMESTAMP:
+		if (payload_size != sizeof(struct kdbus_timestamp))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_CREDS:
+		if (payload_size != sizeof(struct kdbus_creds))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_AUXGROUPS:
+		if (payload_size % sizeof(u32) != 0)
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_NAME:
+	case KDBUS_ITEM_DST_NAME:
+	case KDBUS_ITEM_PID_COMM:
+	case KDBUS_ITEM_TID_COMM:
+	case KDBUS_ITEM_EXE:
+	case KDBUS_ITEM_CMDLINE:
+	case KDBUS_ITEM_CGROUP:
+	case KDBUS_ITEM_SECLABEL:
+		if (!kdbus_str_valid(item->str, payload_size))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_CAPS:
+		if (payload_size < sizeof(u32))
+			return -EINVAL;
+		if (payload_size < sizeof(u32) +
+		    4 * CAP_TO_INDEX(item->caps.last_cap) * sizeof(u32))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_AUDIT:
+		if (payload_size != sizeof(struct kdbus_audit))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_POLICY_ACCESS:
+		if (payload_size != sizeof(struct kdbus_policy_access))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_NAME_ADD:
+	case KDBUS_ITEM_NAME_REMOVE:
+	case KDBUS_ITEM_NAME_CHANGE:
+		if (payload_size < sizeof(struct kdbus_notify_name_change))
+			return -EINVAL;
+		l = payload_size - offsetof(struct kdbus_notify_name_change,
+					    name);
+		if (l > 0 && !kdbus_str_valid(item->name_change.name, l))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_ID_ADD:
+	case KDBUS_ITEM_ID_REMOVE:
+		if (payload_size != sizeof(struct kdbus_notify_id_change))
+			return -EINVAL;
+		break;
+
+	case KDBUS_ITEM_REPLY_TIMEOUT:
+	case KDBUS_ITEM_REPLY_DEAD:
+		if (payload_size != 0)
+			return -EINVAL;
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * kdbus_items_validate() - validate items passed by user-space
+ * @items:		items to validate
+ * @items_size:		number of items
+ *
+ * This verifies that the passed items pointer is consistent and valid.
+ * Furthermore, each item is checked for:
+ *  - valid "size" value
+ *  - payload is of expected type
+ *  - payload is fully included in the item
+ *  - string payloads are zero-terminated
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kdbus_items_validate(const struct kdbus_item *items, size_t items_size)
+{
+	const struct kdbus_item *item;
+	int ret;
+
+	KDBUS_ITEMS_FOREACH(item, items, items_size) {
+		if (!KDBUS_ITEM_VALID(item, items, items_size))
+			return -EINVAL;
+
+		ret = kdbus_item_validate(item);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (!KDBUS_ITEMS_END(item, items, items_size))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * kdbus_item_set() - Set item content
+ * @item:	The item to modify
+ * @type:	The item type to set (KDBUS_ITEM_*)
+ * @data:	Data to copy to item->data, may be %NULL
+ * @len:	Number of bytes in @data
+ *
+ * This sets type, size and data fields of an item. If @data is NULL, the data
+ * memory is cleared.
+ *
+ * Note that you must align your @data memory to 8 bytes. Trailing padding (in
+ * case @len is not 8byte aligned) is cleared by this call.
+ *
+ * Returns: Pointer to the following item.
+ */
+struct kdbus_item *kdbus_item_set(struct kdbus_item *item, u64 type,
+				  const void *data, size_t len)
+{
+	item->type = type;
+	item->size = KDBUS_ITEM_HEADER_SIZE + len;
+
+	if (data) {
+		memcpy(item->data, data, len);
+		memset(item->data + len, 0, KDBUS_ALIGN8(len) - len);
+	} else {
+		memset(item->data, 0, KDBUS_ALIGN8(len));
+	}
+
+	return KDBUS_ITEM_NEXT(item);
+}
diff -Nur linux-4.2/ipc/kdbus/item.h linux-4.2-pck/ipc/kdbus/item.h
--- linux-4.2/ipc/kdbus/item.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/item.h	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_ITEM_H
+#define __KDBUS_ITEM_H
+
+#include <linux/kernel.h>
+#include <uapi/linux/kdbus.h>
+
+#include "util.h"
+
+/* generic access and iterators over a stream of items */
+#define KDBUS_ITEM_NEXT(_i) (typeof(_i))((u8 *)(_i) + KDBUS_ALIGN8((_i)->size))
+#define KDBUS_ITEMS_SIZE(_h, _is) ((_h)->size - offsetof(typeof(*(_h)), _is))
+#define KDBUS_ITEM_HEADER_SIZE offsetof(struct kdbus_item, data)
+#define KDBUS_ITEM_SIZE(_s) KDBUS_ALIGN8(KDBUS_ITEM_HEADER_SIZE + (_s))
+#define KDBUS_ITEM_PAYLOAD_SIZE(_i) ((_i)->size - KDBUS_ITEM_HEADER_SIZE)
+
+#define KDBUS_ITEMS_FOREACH(_i, _is, _s)				\
+	for ((_i) = (_is);						\
+	     ((u8 *)(_i) < (u8 *)(_is) + (_s)) &&			\
+	       ((u8 *)(_i) >= (u8 *)(_is));				\
+	     (_i) = KDBUS_ITEM_NEXT(_i))
+
+#define KDBUS_ITEM_VALID(_i, _is, _s)					\
+	((_i)->size >= KDBUS_ITEM_HEADER_SIZE &&			\
+	 (u8 *)(_i) + (_i)->size > (u8 *)(_i) &&			\
+	 (u8 *)(_i) + (_i)->size <= (u8 *)(_is) + (_s) &&		\
+	 (u8 *)(_i) >= (u8 *)(_is))
+
+#define KDBUS_ITEMS_END(_i, _is, _s)					\
+	((u8 *)(_i) == ((u8 *)(_is) + KDBUS_ALIGN8(_s)))
+
+/**
+ * struct kdbus_item_header - Describes the fix part of an item
+ * @size:	The total size of the item
+ * @type:	The item type, one of KDBUS_ITEM_*
+ */
+struct kdbus_item_header {
+	u64 size;
+	u64 type;
+};
+
+int kdbus_item_validate_name(const struct kdbus_item *item);
+int kdbus_item_validate(const struct kdbus_item *item);
+int kdbus_items_validate(const struct kdbus_item *items, size_t items_size);
+struct kdbus_item *kdbus_item_set(struct kdbus_item *item, u64 type,
+				  const void *data, size_t len);
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/limits.h linux-4.2-pck/ipc/kdbus/limits.h
--- linux-4.2/ipc/kdbus/limits.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/limits.h	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_DEFAULTS_H
+#define __KDBUS_DEFAULTS_H
+
+#include <linux/kernel.h>
+
+/* maximum size of message header and items */
+#define KDBUS_MSG_MAX_SIZE		SZ_8K
+
+/* maximum number of memfd items per message */
+#define KDBUS_MSG_MAX_MEMFD_ITEMS	16
+
+/* max size of ioctl command data */
+#define KDBUS_CMD_MAX_SIZE		SZ_32K
+
+/* maximum number of inflight fds in a target queue per user */
+#define KDBUS_CONN_MAX_FDS_PER_USER	16
+
+/* maximum message payload size */
+#define KDBUS_MSG_MAX_PAYLOAD_VEC_SIZE		SZ_2M
+
+/* maximum size of bloom bit field in bytes */
+#define KDBUS_BUS_BLOOM_MAX_SIZE		SZ_4K
+
+/* maximum length of well-known bus name */
+#define KDBUS_NAME_MAX_LEN			255
+
+/* maximum length of bus, domain, ep name */
+#define KDBUS_SYSNAME_MAX_LEN			63
+
+/* maximum number of matches per connection */
+#define KDBUS_MATCH_MAX				4096
+
+/* maximum number of queued messages from the same individual user */
+#define KDBUS_CONN_MAX_MSGS			256
+
+/* maximum number of well-known names per connection */
+#define KDBUS_CONN_MAX_NAMES			256
+
+/* maximum number of queued requests waiting for a reply */
+#define KDBUS_CONN_MAX_REQUESTS_PENDING		128
+
+/* maximum number of connections per user in one domain */
+#define KDBUS_USER_MAX_CONN			1024
+
+/* maximum number of buses per user in one domain */
+#define KDBUS_USER_MAX_BUSES			16
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/main.c linux-4.2-pck/ipc/kdbus/main.c
--- linux-4.2/ipc/kdbus/main.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/main.c	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#define pr_fmt(fmt)    KBUILD_MODNAME ": " fmt
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include "util.h"
+#include "fs.h"
+#include "handle.h"
+#include "metadata.h"
+#include "node.h"
+
+/*
+ * This is a simplified outline of the internal kdbus object relations, for
+ * those interested in the inner life of the driver implementation.
+ *
+ * From a mount point's (domain's) perspective:
+ *
+ * struct kdbus_domain
+ *   |» struct kdbus_user *user (many, owned)
+ *   '» struct kdbus_node node (embedded)
+ *       |» struct kdbus_node children (many, referenced)
+ *       |» struct kdbus_node *parent (pinned)
+ *       '» struct kdbus_bus (many, pinned)
+ *           |» struct kdbus_node node (embedded)
+ *           '» struct kdbus_ep (many, pinned)
+ *               |» struct kdbus_node node (embedded)
+ *               |» struct kdbus_bus *bus (pinned)
+ *               |» struct kdbus_conn conn_list (many, pinned)
+ *               |   |» struct kdbus_ep *ep (pinned)
+ *               |   |» struct kdbus_name_entry *activator_of (owned)
+ *               |   |» struct kdbus_match_db *match_db (owned)
+ *               |   |» struct kdbus_meta *meta (owned)
+ *               |   |» struct kdbus_match_db *match_db (owned)
+ *               |   |    '» struct kdbus_match_entry (many, owned)
+ *               |   |
+ *               |   |» struct kdbus_pool *pool (owned)
+ *               |   |    '» struct kdbus_pool_slice *slices (many, owned)
+ *               |   |       '» struct kdbus_pool *pool (pinned)
+ *               |   |
+ *               |   |» struct kdbus_user *user (pinned)
+ *               |   `» struct kdbus_queue_entry entries (many, embedded)
+ *               |        |» struct kdbus_pool_slice *slice (pinned)
+ *               |        |» struct kdbus_conn_reply *reply (owned)
+ *               |        '» struct kdbus_user *user (pinned)
+ *               |
+ *               '» struct kdbus_user *user (pinned)
+ *                   '» struct kdbus_policy_db policy_db (embedded)
+ *                        |» struct kdbus_policy_db_entry (many, owned)
+ *                        |   |» struct kdbus_conn (pinned)
+ *                        |   '» struct kdbus_ep (pinned)
+ *                        |
+ *                        '» struct kdbus_policy_db_cache_entry (many, owned)
+ *                            '» struct kdbus_conn (pinned)
+ *
+ * For the life-time of a file descriptor derived from calling open() on a file
+ * inside the mount point:
+ *
+ * struct kdbus_handle
+ *  |» struct kdbus_meta *meta (owned)
+ *  |» struct kdbus_ep *ep (pinned)
+ *  |» struct kdbus_conn *conn (owned)
+ *  '» struct kdbus_ep *ep (owned)
+ */
+
+static int __init kdbus_init(void)
+{
+	int ret;
+
+	ret = sysfs_create_mount_point(fs_kobj, KBUILD_MODNAME);
+	if (ret)
+		return ret;
+
+	ret = kdbus_fs_init();
+	if (ret < 0) {
+		pr_err("cannot register filesystem: %d\n", ret);
+		goto exit_dir;
+	}
+
+	pr_info("initialized\n");
+	return 0;
+
+exit_dir:
+	sysfs_remove_mount_point(fs_kobj, KBUILD_MODNAME);
+	return ret;
+}
+
+static void __exit kdbus_exit(void)
+{
+	kdbus_fs_exit();
+	sysfs_remove_mount_point(fs_kobj, KBUILD_MODNAME);
+	ida_destroy(&kdbus_node_ida);
+}
+
+module_init(kdbus_init);
+module_exit(kdbus_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("D-Bus, powerful, easy to use interprocess communication");
+MODULE_ALIAS_FS(KBUILD_MODNAME "fs");
diff -Nur linux-4.2/ipc/kdbus/match.c linux-4.2-pck/ipc/kdbus/match.c
--- linux-4.2/ipc/kdbus/match.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/match.c	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,546 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/fs.h>
+#include <linux/hash.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "bus.h"
+#include "connection.h"
+#include "endpoint.h"
+#include "handle.h"
+#include "item.h"
+#include "match.h"
+#include "message.h"
+#include "names.h"
+
+/**
+ * struct kdbus_match_db - message filters
+ * @entries_list:	List of matches
+ * @mdb_rwlock:		Match data lock
+ * @entries_count:	Number of entries in database
+ */
+struct kdbus_match_db {
+	struct list_head entries_list;
+	struct rw_semaphore mdb_rwlock;
+	unsigned int entries_count;
+};
+
+/**
+ * struct kdbus_match_entry - a match database entry
+ * @cookie:		User-supplied cookie to lookup the entry
+ * @list_entry:		The list entry element for the db list
+ * @rules_list:		The list head for tracking rules of this entry
+ */
+struct kdbus_match_entry {
+	u64 cookie;
+	struct list_head list_entry;
+	struct list_head rules_list;
+};
+
+/**
+ * struct kdbus_bloom_mask - mask to match against filter
+ * @generations:	Number of generations carried
+ * @data:		Array of bloom bit fields
+ */
+struct kdbus_bloom_mask {
+	u64 generations;
+	u64 *data;
+};
+
+/**
+ * struct kdbus_match_rule - a rule appended to a match entry
+ * @type:		An item type to match against
+ * @bloom_mask:		Bloom mask to match a message's filter against, used
+ *			with KDBUS_ITEM_BLOOM_MASK
+ * @name:		Name to match against, used with KDBUS_ITEM_NAME,
+ *			KDBUS_ITEM_NAME_{ADD,REMOVE,CHANGE}
+ * @old_id:		ID to match against, used with
+ *			KDBUS_ITEM_NAME_{ADD,REMOVE,CHANGE},
+ *			KDBUS_ITEM_ID_REMOVE
+ * @new_id:		ID to match against, used with
+ *			KDBUS_ITEM_NAME_{ADD,REMOVE,CHANGE},
+ *			KDBUS_ITEM_ID_REMOVE
+ * @src_id:		ID to match against, used with KDBUS_ITEM_ID
+ * @dst_id:		Message destination ID, used with KDBUS_ITEM_DST_ID
+ * @rules_entry:	Entry in the entry's rules list
+ */
+struct kdbus_match_rule {
+	u64 type;
+	union {
+		struct kdbus_bloom_mask bloom_mask;
+		struct {
+			char *name;
+			u64 old_id;
+			u64 new_id;
+		};
+		u64 src_id;
+		u64 dst_id;
+	};
+	struct list_head rules_entry;
+};
+
+static void kdbus_match_rule_free(struct kdbus_match_rule *rule)
+{
+	if (!rule)
+		return;
+
+	switch (rule->type) {
+	case KDBUS_ITEM_BLOOM_MASK:
+		kfree(rule->bloom_mask.data);
+		break;
+
+	case KDBUS_ITEM_NAME:
+	case KDBUS_ITEM_NAME_ADD:
+	case KDBUS_ITEM_NAME_REMOVE:
+	case KDBUS_ITEM_NAME_CHANGE:
+		kfree(rule->name);
+		break;
+
+	case KDBUS_ITEM_ID:
+	case KDBUS_ITEM_DST_ID:
+	case KDBUS_ITEM_ID_ADD:
+	case KDBUS_ITEM_ID_REMOVE:
+		break;
+
+	default:
+		BUG();
+	}
+
+	list_del(&rule->rules_entry);
+	kfree(rule);
+}
+
+static void kdbus_match_entry_free(struct kdbus_match_entry *entry)
+{
+	struct kdbus_match_rule *r, *tmp;
+
+	if (!entry)
+		return;
+
+	list_for_each_entry_safe(r, tmp, &entry->rules_list, rules_entry)
+		kdbus_match_rule_free(r);
+
+	list_del(&entry->list_entry);
+	kfree(entry);
+}
+
+/**
+ * kdbus_match_db_free() - free match db resources
+ * @mdb:		The match database
+ */
+void kdbus_match_db_free(struct kdbus_match_db *mdb)
+{
+	struct kdbus_match_entry *entry, *tmp;
+
+	if (!mdb)
+		return;
+
+	list_for_each_entry_safe(entry, tmp, &mdb->entries_list, list_entry)
+		kdbus_match_entry_free(entry);
+
+	kfree(mdb);
+}
+
+/**
+ * kdbus_match_db_new() - create a new match database
+ *
+ * Return: a new kdbus_match_db on success, ERR_PTR on failure.
+ */
+struct kdbus_match_db *kdbus_match_db_new(void)
+{
+	struct kdbus_match_db *d;
+
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return ERR_PTR(-ENOMEM);
+
+	init_rwsem(&d->mdb_rwlock);
+	INIT_LIST_HEAD(&d->entries_list);
+
+	return d;
+}
+
+static bool kdbus_match_bloom(const struct kdbus_bloom_filter *filter,
+			      const struct kdbus_bloom_mask *mask,
+			      const struct kdbus_conn *conn)
+{
+	size_t n = conn->ep->bus->bloom.size / sizeof(u64);
+	const u64 *m;
+	size_t i;
+
+	/*
+	 * The message's filter carries a generation identifier, the
+	 * match's mask possibly carries an array of multiple generations
+	 * of the mask. Select the mask with the closest match of the
+	 * filter's generation.
+	 */
+	m = mask->data + (min(filter->generation, mask->generations - 1) * n);
+
+	/*
+	 * The message's filter contains the messages properties,
+	 * the match's mask contains the properties to look for in the
+	 * message. Check the mask bit field against the filter bit field,
+	 * if the message possibly carries the properties the connection
+	 * has subscribed to.
+	 */
+	for (i = 0; i < n; i++)
+		if ((filter->data[i] & m[i]) != m[i])
+			return false;
+
+	return true;
+}
+
+static bool kdbus_match_rule_conn(const struct kdbus_match_rule *r,
+				  struct kdbus_conn *c,
+				  const struct kdbus_staging *s)
+{
+	lockdep_assert_held(&c->ep->bus->name_registry->rwlock);
+
+	switch (r->type) {
+	case KDBUS_ITEM_BLOOM_MASK:
+		return kdbus_match_bloom(s->bloom_filter, &r->bloom_mask, c);
+	case KDBUS_ITEM_ID:
+		return r->src_id == c->id || r->src_id == KDBUS_MATCH_ID_ANY;
+	case KDBUS_ITEM_DST_ID:
+		return r->dst_id == s->msg->dst_id ||
+		       r->dst_id == KDBUS_MATCH_ID_ANY;
+	case KDBUS_ITEM_NAME:
+		return kdbus_conn_has_name(c, r->name);
+	default:
+		return false;
+	}
+}
+
+static bool kdbus_match_rule_kernel(const struct kdbus_match_rule *r,
+				    const struct kdbus_staging *s)
+{
+	struct kdbus_item *n = s->notify;
+
+	if (WARN_ON(!n) || n->type != r->type)
+		return false;
+
+	switch (r->type) {
+	case KDBUS_ITEM_ID_ADD:
+		return r->new_id == KDBUS_MATCH_ID_ANY ||
+		       r->new_id == n->id_change.id;
+	case KDBUS_ITEM_ID_REMOVE:
+		return r->old_id == KDBUS_MATCH_ID_ANY ||
+		       r->old_id == n->id_change.id;
+	case KDBUS_ITEM_NAME_ADD:
+	case KDBUS_ITEM_NAME_CHANGE:
+	case KDBUS_ITEM_NAME_REMOVE:
+		return (r->old_id == KDBUS_MATCH_ID_ANY ||
+		        r->old_id == n->name_change.old_id.id) &&
+		       (r->new_id == KDBUS_MATCH_ID_ANY ||
+		        r->new_id == n->name_change.new_id.id) &&
+		       (!r->name || !strcmp(r->name, n->name_change.name));
+	default:
+		return false;
+	}
+}
+
+static bool kdbus_match_rules(const struct kdbus_match_entry *entry,
+			      struct kdbus_conn *c,
+			      const struct kdbus_staging *s)
+{
+	struct kdbus_match_rule *r;
+
+	list_for_each_entry(r, &entry->rules_list, rules_entry)
+		if ((c && !kdbus_match_rule_conn(r, c, s)) ||
+		    (!c && !kdbus_match_rule_kernel(r, s)))
+			return false;
+
+	return true;
+}
+
+/**
+ * kdbus_match_db_match_msg() - match a msg object agains the database entries
+ * @mdb:		The match database
+ * @conn_src:		The connection object originating the message
+ * @staging:		Staging object containing the message to match against
+ *
+ * This function will walk through all the database entries previously uploaded
+ * with kdbus_match_db_add(). As soon as any of them has an all-satisfied rule
+ * set, this function will return true.
+ *
+ * The caller must hold the registry lock of conn_src->ep->bus, in case conn_src
+ * is non-NULL.
+ *
+ * Return: true if there was a matching database entry, false otherwise.
+ */
+bool kdbus_match_db_match_msg(struct kdbus_match_db *mdb,
+			      struct kdbus_conn *conn_src,
+			      const struct kdbus_staging *staging)
+{
+	struct kdbus_match_entry *entry;
+	bool matched = false;
+
+	down_read(&mdb->mdb_rwlock);
+	list_for_each_entry(entry, &mdb->entries_list, list_entry) {
+		matched = kdbus_match_rules(entry, conn_src, staging);
+		if (matched)
+			break;
+	}
+	up_read(&mdb->mdb_rwlock);
+
+	return matched;
+}
+
+static int kdbus_match_db_remove_unlocked(struct kdbus_match_db *mdb,
+					  u64 cookie)
+{
+	struct kdbus_match_entry *entry, *tmp;
+	bool found = false;
+
+	list_for_each_entry_safe(entry, tmp, &mdb->entries_list, list_entry)
+		if (entry->cookie == cookie) {
+			kdbus_match_entry_free(entry);
+			--mdb->entries_count;
+			found = true;
+		}
+
+	return found ? 0 : -EBADSLT;
+}
+
+/**
+ * kdbus_cmd_match_add() - handle KDBUS_CMD_MATCH_ADD
+ * @conn:		connection to operate on
+ * @argp:		command payload
+ *
+ * One call to this function (or one ioctl(KDBUS_CMD_MATCH_ADD), respectively,
+ * adds one new database entry with n rules attached to it. Each rule is
+ * described with an kdbus_item, and an entry is considered matching if all
+ * its rules are satisfied.
+ *
+ * The items attached to a kdbus_cmd_match struct have the following mapping:
+ *
+ * KDBUS_ITEM_BLOOM_MASK:	A bloom mask
+ * KDBUS_ITEM_NAME:		A connection's source name
+ * KDBUS_ITEM_ID:		A connection ID
+ * KDBUS_ITEM_DST_ID:		A connection ID
+ * KDBUS_ITEM_NAME_ADD:
+ * KDBUS_ITEM_NAME_REMOVE:
+ * KDBUS_ITEM_NAME_CHANGE:	Well-known name changes, carry
+ *				kdbus_notify_name_change
+ * KDBUS_ITEM_ID_ADD:
+ * KDBUS_ITEM_ID_REMOVE:	Connection ID changes, carry
+ *				kdbus_notify_id_change
+ *
+ * For kdbus_notify_{id,name}_change structs, only the ID and name fields
+ * are looked at when adding an entry. The flags are unused.
+ *
+ * Also note that KDBUS_ITEM_BLOOM_MASK, KDBUS_ITEM_NAME, KDBUS_ITEM_ID,
+ * and KDBUS_ITEM_DST_ID are used to match messages from userspace, while the
+ * others apply to kernel-generated notifications.
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_match_add(struct kdbus_conn *conn, void __user *argp)
+{
+	struct kdbus_match_db *mdb = conn->match_db;
+	struct kdbus_match_entry *entry = NULL;
+	struct kdbus_cmd_match *cmd;
+	struct kdbus_item *item;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+		{ .type = KDBUS_ITEM_BLOOM_MASK, .multiple = true },
+		{ .type = KDBUS_ITEM_NAME, .multiple = true },
+		{ .type = KDBUS_ITEM_ID, .multiple = true },
+		{ .type = KDBUS_ITEM_DST_ID, .multiple = true },
+		{ .type = KDBUS_ITEM_NAME_ADD, .multiple = true },
+		{ .type = KDBUS_ITEM_NAME_REMOVE, .multiple = true },
+		{ .type = KDBUS_ITEM_NAME_CHANGE, .multiple = true },
+		{ .type = KDBUS_ITEM_ID_ADD, .multiple = true },
+		{ .type = KDBUS_ITEM_ID_REMOVE, .multiple = true },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE |
+				 KDBUS_MATCH_REPLACE,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	if (!kdbus_conn_is_ordinary(conn))
+		return -EOPNOTSUPP;
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	entry->cookie = cmd->cookie;
+	INIT_LIST_HEAD(&entry->list_entry);
+	INIT_LIST_HEAD(&entry->rules_list);
+
+	KDBUS_ITEMS_FOREACH(item, cmd->items, KDBUS_ITEMS_SIZE(cmd, items)) {
+		struct kdbus_match_rule *rule;
+		size_t size = item->size - offsetof(struct kdbus_item, data);
+
+		rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+		if (!rule) {
+			ret = -ENOMEM;
+			goto exit;
+		}
+
+		rule->type = item->type;
+		INIT_LIST_HEAD(&rule->rules_entry);
+
+		switch (item->type) {
+		case KDBUS_ITEM_BLOOM_MASK: {
+			u64 bsize = conn->ep->bus->bloom.size;
+			u64 generations;
+			u64 remainder;
+
+			generations = div64_u64_rem(size, bsize, &remainder);
+			if (size < bsize || remainder > 0) {
+				ret = -EDOM;
+				break;
+			}
+
+			rule->bloom_mask.data = kmemdup(item->data,
+							size, GFP_KERNEL);
+			if (!rule->bloom_mask.data) {
+				ret = -ENOMEM;
+				break;
+			}
+
+			rule->bloom_mask.generations = generations;
+			break;
+		}
+
+		case KDBUS_ITEM_NAME:
+			if (!kdbus_name_is_valid(item->str, false)) {
+				ret = -EINVAL;
+				break;
+			}
+
+			rule->name = kstrdup(item->str, GFP_KERNEL);
+			if (!rule->name)
+				ret = -ENOMEM;
+
+			break;
+
+		case KDBUS_ITEM_ID:
+			rule->src_id = item->id;
+			break;
+
+		case KDBUS_ITEM_DST_ID:
+			rule->dst_id = item->id;
+			break;
+
+		case KDBUS_ITEM_NAME_ADD:
+		case KDBUS_ITEM_NAME_REMOVE:
+		case KDBUS_ITEM_NAME_CHANGE:
+			rule->old_id = item->name_change.old_id.id;
+			rule->new_id = item->name_change.new_id.id;
+
+			if (size > sizeof(struct kdbus_notify_name_change)) {
+				rule->name = kstrdup(item->name_change.name,
+						     GFP_KERNEL);
+				if (!rule->name)
+					ret = -ENOMEM;
+			}
+
+			break;
+
+		case KDBUS_ITEM_ID_ADD:
+		case KDBUS_ITEM_ID_REMOVE:
+			if (item->type == KDBUS_ITEM_ID_ADD)
+				rule->new_id = item->id_change.id;
+			else
+				rule->old_id = item->id_change.id;
+
+			break;
+		}
+
+		if (ret < 0) {
+			kdbus_match_rule_free(rule);
+			goto exit;
+		}
+
+		list_add_tail(&rule->rules_entry, &entry->rules_list);
+	}
+
+	down_write(&mdb->mdb_rwlock);
+
+	/* Remove any entry that has the same cookie as the current one. */
+	if (cmd->flags & KDBUS_MATCH_REPLACE)
+		kdbus_match_db_remove_unlocked(mdb, entry->cookie);
+
+	/*
+	 * If the above removal caught any entry, there will be room for the
+	 * new one.
+	 */
+	if (++mdb->entries_count > KDBUS_MATCH_MAX) {
+		--mdb->entries_count;
+		ret = -EMFILE;
+	} else {
+		list_add_tail(&entry->list_entry, &mdb->entries_list);
+		entry = NULL;
+	}
+
+	up_write(&mdb->mdb_rwlock);
+
+exit:
+	kdbus_match_entry_free(entry);
+	return kdbus_args_clear(&args, ret);
+}
+
+/**
+ * kdbus_cmd_match_remove() - handle KDBUS_CMD_MATCH_REMOVE
+ * @conn:		connection to operate on
+ * @argp:		command payload
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_match_remove(struct kdbus_conn *conn, void __user *argp)
+{
+	struct kdbus_cmd_match *cmd;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	if (!kdbus_conn_is_ordinary(conn))
+		return -EOPNOTSUPP;
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	down_write(&conn->match_db->mdb_rwlock);
+	ret = kdbus_match_db_remove_unlocked(conn->match_db, cmd->cookie);
+	up_write(&conn->match_db->mdb_rwlock);
+
+	return kdbus_args_clear(&args, ret);
+}
diff -Nur linux-4.2/ipc/kdbus/match.h linux-4.2-pck/ipc/kdbus/match.h
--- linux-4.2/ipc/kdbus/match.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/match.h	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_MATCH_H
+#define __KDBUS_MATCH_H
+
+struct kdbus_conn;
+struct kdbus_match_db;
+struct kdbus_staging;
+
+struct kdbus_match_db *kdbus_match_db_new(void);
+void kdbus_match_db_free(struct kdbus_match_db *db);
+int kdbus_match_db_add(struct kdbus_conn *conn,
+		       struct kdbus_cmd_match *cmd);
+int kdbus_match_db_remove(struct kdbus_conn *conn,
+			  struct kdbus_cmd_match *cmd);
+bool kdbus_match_db_match_msg(struct kdbus_match_db *db,
+			      struct kdbus_conn *conn_src,
+			      const struct kdbus_staging *staging);
+
+int kdbus_cmd_match_add(struct kdbus_conn *conn, void __user *argp);
+int kdbus_cmd_match_remove(struct kdbus_conn *conn, void __user *argp);
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/message.c linux-4.2-pck/ipc/kdbus/message.c
--- linux-4.2/ipc/kdbus/message.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/message.c	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,1040 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/capability.h>
+#include <linux/cgroup.h>
+#include <linux/cred.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/shmem_fs.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <net/sock.h>
+
+#include "bus.h"
+#include "connection.h"
+#include "domain.h"
+#include "endpoint.h"
+#include "handle.h"
+#include "item.h"
+#include "match.h"
+#include "message.h"
+#include "names.h"
+#include "policy.h"
+
+static const char * const zeros = "\0\0\0\0\0\0\0";
+
+static struct kdbus_gaps *kdbus_gaps_new(size_t n_memfds, size_t n_fds)
+{
+	size_t size_offsets, size_memfds, size_fds, size;
+	struct kdbus_gaps *gaps;
+
+	size_offsets = n_memfds * sizeof(*gaps->memfd_offsets);
+	size_memfds = n_memfds * sizeof(*gaps->memfd_files);
+	size_fds = n_fds * sizeof(*gaps->fd_files);
+	size = sizeof(*gaps) + size_offsets + size_memfds + size_fds;
+
+	gaps = kzalloc(size, GFP_KERNEL);
+	if (!gaps)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&gaps->kref);
+	gaps->n_memfds = 0; /* we reserve n_memfds, but don't enforce them */
+	gaps->memfd_offsets = (void *)(gaps + 1);
+	gaps->memfd_files = (void *)((u8 *)gaps->memfd_offsets + size_offsets);
+	gaps->n_fds = 0; /* we reserve n_fds, but don't enforce them */
+	gaps->fd_files = (void *)((u8 *)gaps->memfd_files + size_memfds);
+
+	return gaps;
+}
+
+static void kdbus_gaps_free(struct kref *kref)
+{
+	struct kdbus_gaps *gaps = container_of(kref, struct kdbus_gaps, kref);
+	size_t i;
+
+	for (i = 0; i < gaps->n_fds; ++i)
+		if (gaps->fd_files[i])
+			fput(gaps->fd_files[i]);
+	for (i = 0; i < gaps->n_memfds; ++i)
+		if (gaps->memfd_files[i])
+			fput(gaps->memfd_files[i]);
+
+	kfree(gaps);
+}
+
+/**
+ * kdbus_gaps_ref() - gain reference
+ * @gaps:	gaps object
+ *
+ * Return: @gaps is returned
+ */
+struct kdbus_gaps *kdbus_gaps_ref(struct kdbus_gaps *gaps)
+{
+	if (gaps)
+		kref_get(&gaps->kref);
+	return gaps;
+}
+
+/**
+ * kdbus_gaps_unref() - drop reference
+ * @gaps:	gaps object
+ *
+ * Return: NULL
+ */
+struct kdbus_gaps *kdbus_gaps_unref(struct kdbus_gaps *gaps)
+{
+	if (gaps)
+		kref_put(&gaps->kref, kdbus_gaps_free);
+	return NULL;
+}
+
+/**
+ * kdbus_gaps_install() - install file-descriptors
+ * @gaps:		gaps object, or NULL
+ * @slice:		pool slice that contains the message
+ * @out_incomplete	output variable to note incomplete fds
+ *
+ * This function installs all file-descriptors of @gaps into the current
+ * process and copies the file-descriptor numbers into the target pool slice.
+ *
+ * If the file-descriptors were only partially installed, then @out_incomplete
+ * will be set to true. Otherwise, it's set to false.
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int kdbus_gaps_install(struct kdbus_gaps *gaps, struct kdbus_pool_slice *slice,
+		       bool *out_incomplete)
+{
+	bool incomplete_fds = false;
+	struct kvec kvec;
+	size_t i, n_fds;
+	int ret, *fds;
+
+	if (!gaps) {
+		/* nothing to do */
+		*out_incomplete = incomplete_fds;
+		return 0;
+	}
+
+	n_fds = gaps->n_fds + gaps->n_memfds;
+	if (n_fds < 1) {
+		/* nothing to do */
+		*out_incomplete = incomplete_fds;
+		return 0;
+	}
+
+	fds = kmalloc_array(n_fds, sizeof(*fds), GFP_TEMPORARY);
+	n_fds = 0;
+	if (!fds)
+		return -ENOMEM;
+
+	/* 1) allocate fds and copy them over */
+
+	if (gaps->n_fds > 0) {
+		for (i = 0; i < gaps->n_fds; ++i) {
+			int fd;
+
+			fd = get_unused_fd_flags(O_CLOEXEC);
+			if (fd < 0)
+				incomplete_fds = true;
+
+			WARN_ON(!gaps->fd_files[i]);
+
+			fds[n_fds++] = fd < 0 ? -1 : fd;
+		}
+
+		/*
+		 * The file-descriptor array can only be present once per
+		 * message. Hence, prepare all fds and then copy them over with
+		 * a single kvec.
+		 */
+
+		WARN_ON(!gaps->fd_offset);
+
+		kvec.iov_base = fds;
+		kvec.iov_len = gaps->n_fds * sizeof(*fds);
+		ret = kdbus_pool_slice_copy_kvec(slice, gaps->fd_offset,
+						 &kvec, 1, kvec.iov_len);
+		if (ret < 0)
+			goto exit;
+	}
+
+	for (i = 0; i < gaps->n_memfds; ++i) {
+		int memfd;
+
+		memfd = get_unused_fd_flags(O_CLOEXEC);
+		if (memfd < 0) {
+			incomplete_fds = true;
+			/* memfds are initialized to -1, skip copying it */
+			continue;
+		}
+
+		fds[n_fds++] = memfd;
+
+		/*
+		 * memfds have to be copied individually as they each are put
+		 * into a separate item. This should not be an issue, though,
+		 * as usually there is no need to send more than one memfd per
+		 * message.
+		 */
+
+		WARN_ON(!gaps->memfd_offsets[i]);
+		WARN_ON(!gaps->memfd_files[i]);
+
+		kvec.iov_base = &memfd;
+		kvec.iov_len = sizeof(memfd);
+		ret = kdbus_pool_slice_copy_kvec(slice, gaps->memfd_offsets[i],
+						 &kvec, 1, kvec.iov_len);
+		if (ret < 0)
+			goto exit;
+	}
+
+	/* 2) install fds now that everything was successful */
+
+	for (i = 0; i < gaps->n_fds; ++i)
+		if (fds[i] >= 0)
+			fd_install(fds[i], get_file(gaps->fd_files[i]));
+	for (i = 0; i < gaps->n_memfds; ++i)
+		if (fds[gaps->n_fds + i] >= 0)
+			fd_install(fds[gaps->n_fds + i],
+				   get_file(gaps->memfd_files[i]));
+
+	ret = 0;
+
+exit:
+	if (ret < 0)
+		for (i = 0; i < n_fds; ++i)
+			put_unused_fd(fds[i]);
+	kfree(fds);
+	*out_incomplete = incomplete_fds;
+	return ret;
+}
+
+static struct file *kdbus_get_fd(int fd)
+{
+	struct file *f, *ret;
+	struct inode *inode;
+	struct socket *sock;
+
+	if (fd < 0)
+		return ERR_PTR(-EBADF);
+
+	f = fget_raw(fd);
+	if (!f)
+		return ERR_PTR(-EBADF);
+
+	inode = file_inode(f);
+	sock = S_ISSOCK(inode->i_mode) ? SOCKET_I(inode) : NULL;
+
+	if (f->f_mode & FMODE_PATH)
+		ret = f; /* O_PATH is always allowed */
+	else if (f->f_op == &kdbus_handle_ops)
+		ret = ERR_PTR(-EOPNOTSUPP); /* disallow kdbus-fd over kdbus */
+	else if (sock && sock->sk && sock->ops && sock->ops->family == PF_UNIX)
+		ret = ERR_PTR(-EOPNOTSUPP); /* disallow UDS over kdbus */
+	else
+		ret = f; /* all other are allowed */
+
+	if (f != ret)
+		fput(f);
+
+	return ret;
+}
+
+static struct file *kdbus_get_memfd(const struct kdbus_memfd *memfd)
+{
+	const int m = F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL;
+	struct file *f, *ret;
+	int s;
+
+	if (memfd->fd < 0)
+		return ERR_PTR(-EBADF);
+
+	f = fget(memfd->fd);
+	if (!f)
+		return ERR_PTR(-EBADF);
+
+	s = shmem_get_seals(f);
+	if (s < 0)
+		ret = ERR_PTR(-EMEDIUMTYPE);
+	else if ((s & m) != m)
+		ret = ERR_PTR(-ETXTBSY);
+	else if (memfd->start + memfd->size > (u64)i_size_read(file_inode(f)))
+		ret = ERR_PTR(-EFAULT);
+	else
+		ret = f;
+
+	if (f != ret)
+		fput(f);
+
+	return ret;
+}
+
+static int kdbus_msg_examine(struct kdbus_msg *msg, struct kdbus_bus *bus,
+			     struct kdbus_cmd_send *cmd, size_t *out_n_memfds,
+			     size_t *out_n_fds, size_t *out_n_parts)
+{
+	struct kdbus_item *item, *fds = NULL, *bloom = NULL, *dstname = NULL;
+	u64 n_parts, n_memfds, n_fds, vec_size;
+
+	/*
+	 * Step 1:
+	 * Validate the message and command parameters.
+	 */
+
+	/* KDBUS_PAYLOAD_KERNEL is reserved to kernel messages */
+	if (msg->payload_type == KDBUS_PAYLOAD_KERNEL)
+		return -EINVAL;
+
+	if (msg->dst_id == KDBUS_DST_ID_BROADCAST) {
+		/* broadcasts must be marked as signals */
+		if (!(msg->flags & KDBUS_MSG_SIGNAL))
+			return -EBADMSG;
+		/* broadcasts cannot have timeouts */
+		if (msg->timeout_ns > 0)
+			return -ENOTUNIQ;
+	}
+
+	if (msg->flags & KDBUS_MSG_EXPECT_REPLY) {
+		/* if you expect a reply, you must specify a timeout */
+		if (msg->timeout_ns == 0)
+			return -EINVAL;
+		/* signals cannot have replies */
+		if (msg->flags & KDBUS_MSG_SIGNAL)
+			return -ENOTUNIQ;
+	} else {
+		/* must expect reply if sent as synchronous call */
+		if (cmd->flags & KDBUS_SEND_SYNC_REPLY)
+			return -EINVAL;
+		/* cannot mark replies as signal */
+		if (msg->cookie_reply && (msg->flags & KDBUS_MSG_SIGNAL))
+			return -EINVAL;
+	}
+
+	/*
+	 * Step 2:
+	 * Validate all passed items. While at it, select some statistics that
+	 * are required to allocate state objects later on.
+	 *
+	 * Generic item validation has already been done via
+	 * kdbus_item_validate(). Furthermore, the number of items is naturally
+	 * limited by the maximum message size. Hence, only non-generic item
+	 * checks are performed here (mainly integer overflow tests).
+	 */
+
+	n_parts = 0;
+	n_memfds = 0;
+	n_fds = 0;
+	vec_size = 0;
+
+	KDBUS_ITEMS_FOREACH(item, msg->items, KDBUS_ITEMS_SIZE(msg, items)) {
+		switch (item->type) {
+		case KDBUS_ITEM_PAYLOAD_VEC: {
+			void __force __user *ptr = KDBUS_PTR(item->vec.address);
+			u64 size = item->vec.size;
+
+			if (vec_size + size < vec_size)
+				return -EMSGSIZE;
+			if (vec_size + size > KDBUS_MSG_MAX_PAYLOAD_VEC_SIZE)
+				return -EMSGSIZE;
+			if (ptr && unlikely(!access_ok(VERIFY_READ, ptr, size)))
+				return -EFAULT;
+
+			if (ptr || size % 8) /* data or padding */
+				++n_parts;
+			break;
+		}
+		case KDBUS_ITEM_PAYLOAD_MEMFD: {
+			u64 start = item->memfd.start;
+			u64 size = item->memfd.size;
+
+			if (start + size < start)
+				return -EMSGSIZE;
+			if (n_memfds >= KDBUS_MSG_MAX_MEMFD_ITEMS)
+				return -E2BIG;
+
+			++n_memfds;
+			if (size % 8) /* vec-padding required */
+				++n_parts;
+			break;
+		}
+		case KDBUS_ITEM_FDS: {
+			if (fds)
+				return -EEXIST;
+
+			fds = item;
+			n_fds = KDBUS_ITEM_PAYLOAD_SIZE(item) / sizeof(int);
+			if (n_fds > KDBUS_CONN_MAX_FDS_PER_USER)
+				return -EMFILE;
+
+			break;
+		}
+		case KDBUS_ITEM_BLOOM_FILTER: {
+			u64 bloom_size;
+
+			if (bloom)
+				return -EEXIST;
+
+			bloom = item;
+			bloom_size = KDBUS_ITEM_PAYLOAD_SIZE(item) -
+				     offsetof(struct kdbus_bloom_filter, data);
+			if (!KDBUS_IS_ALIGNED8(bloom_size))
+				return -EFAULT;
+			if (bloom_size != bus->bloom.size)
+				return -EDOM;
+
+			break;
+		}
+		case KDBUS_ITEM_DST_NAME: {
+			if (dstname)
+				return -EEXIST;
+
+			dstname = item;
+			if (!kdbus_name_is_valid(item->str, false))
+				return -EINVAL;
+			if (msg->dst_id == KDBUS_DST_ID_BROADCAST)
+				return -EBADMSG;
+
+			break;
+		}
+		default:
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * Step 3:
+	 * Validate that required items were actually passed, and that no item
+	 * contradicts the message flags.
+	 */
+
+	/* bloom filters must be attached _iff_ it's a signal */
+	if (!(msg->flags & KDBUS_MSG_SIGNAL) != !bloom)
+		return -EBADMSG;
+	/* destination name is required if no ID is given */
+	if (msg->dst_id == KDBUS_DST_ID_NAME && !dstname)
+		return -EDESTADDRREQ;
+	/* cannot send file-descriptors attached to broadcasts */
+	if (msg->dst_id == KDBUS_DST_ID_BROADCAST && fds)
+		return -ENOTUNIQ;
+
+	*out_n_memfds = n_memfds;
+	*out_n_fds = n_fds;
+	*out_n_parts = n_parts;
+
+	return 0;
+}
+
+static bool kdbus_staging_merge_vecs(struct kdbus_staging *staging,
+				     struct kdbus_item **prev_item,
+				     struct iovec **prev_vec,
+				     const struct kdbus_item *merge)
+{
+	void __user *ptr = (void __user *)KDBUS_PTR(merge->vec.address);
+	u64 padding = merge->vec.size % 8;
+	struct kdbus_item *prev = *prev_item;
+	struct iovec *vec = *prev_vec;
+
+	/* XXX: merging is disabled so far */
+	if (0 && prev && prev->type == KDBUS_ITEM_PAYLOAD_OFF &&
+	    !merge->vec.address == !prev->vec.address) {
+		/*
+		 * If we merge two VECs, we can always drop the second
+		 * PAYLOAD_VEC item. Hence, include its size in the previous
+		 * one.
+		 */
+		prev->vec.size += merge->vec.size;
+
+		if (ptr) {
+			/*
+			 * If we merge two data VECs, we need two iovecs to copy
+			 * the data. But the items can be easily merged by
+			 * summing their lengths.
+			 */
+			vec = &staging->parts[staging->n_parts++];
+			vec->iov_len = merge->vec.size;
+			vec->iov_base = ptr;
+			staging->n_payload += vec->iov_len;
+		} else if (padding) {
+			/*
+			 * If we merge two 0-vecs with the second 0-vec
+			 * requiring padding, we need to insert an iovec to copy
+			 * the 0-padding. We try merging it with the previous
+			 * 0-padding iovec. This might end up with an
+			 * iov_len==0, in which case we simply drop the iovec.
+			 */
+			if (vec) {
+				staging->n_payload -= vec->iov_len;
+				vec->iov_len = prev->vec.size % 8;
+				if (!vec->iov_len) {
+					--staging->n_parts;
+					vec = NULL;
+				} else {
+					staging->n_payload += vec->iov_len;
+				}
+			} else {
+				vec = &staging->parts[staging->n_parts++];
+				vec->iov_len = padding;
+				vec->iov_base = (char __user *)zeros;
+				staging->n_payload += vec->iov_len;
+			}
+		} else {
+			/*
+			 * If we merge two 0-vecs with the second 0-vec having
+			 * no padding, we know the padding of the first stays
+			 * the same. Hence, @vec needs no adjustment.
+			 */
+		}
+
+		/* successfully merged with previous item */
+		merge = prev;
+	} else {
+		/*
+		 * If we cannot merge the payload item with the previous one,
+		 * we simply insert a new iovec for the data/padding.
+		 */
+		if (ptr) {
+			vec = &staging->parts[staging->n_parts++];
+			vec->iov_len = merge->vec.size;
+			vec->iov_base = ptr;
+			staging->n_payload += vec->iov_len;
+		} else if (padding) {
+			vec = &staging->parts[staging->n_parts++];
+			vec->iov_len = padding;
+			vec->iov_base = (char __user *)zeros;
+			staging->n_payload += vec->iov_len;
+		} else {
+			vec = NULL;
+		}
+	}
+
+	*prev_item = (struct kdbus_item *)merge;
+	*prev_vec = vec;
+
+	return merge == prev;
+}
+
+static int kdbus_staging_import(struct kdbus_staging *staging)
+{
+	struct kdbus_item *it, *item, *last, *prev_payload;
+	struct kdbus_gaps *gaps = staging->gaps;
+	struct kdbus_msg *msg = staging->msg;
+	struct iovec *part, *prev_part;
+	bool drop_item;
+
+	drop_item = false;
+	last = NULL;
+	prev_payload = NULL;
+	prev_part = NULL;
+
+	/*
+	 * We modify msg->items along the way; make sure to use @item as offset
+	 * to the next item (instead of the iterator @it).
+	 */
+	for (it = item = msg->items;
+	     it >= msg->items &&
+	             (u8 *)it < (u8 *)msg + msg->size &&
+	             (u8 *)it + it->size <= (u8 *)msg + msg->size; ) {
+		/*
+		 * If we dropped items along the way, move current item to
+		 * front. We must not access @it afterwards, but use @item
+		 * instead!
+		 */
+		if (it != item)
+			memmove(item, it, it->size);
+		it = (void *)((u8 *)it + KDBUS_ALIGN8(item->size));
+
+		switch (item->type) {
+		case KDBUS_ITEM_PAYLOAD_VEC: {
+			size_t offset = staging->n_payload;
+
+			if (kdbus_staging_merge_vecs(staging, &prev_payload,
+						     &prev_part, item)) {
+				drop_item = true;
+			} else if (item->vec.address) {
+				/* real offset is patched later on */
+				item->type = KDBUS_ITEM_PAYLOAD_OFF;
+				item->vec.offset = offset;
+			} else {
+				item->type = KDBUS_ITEM_PAYLOAD_OFF;
+				item->vec.offset = ~0ULL;
+			}
+
+			break;
+		}
+		case KDBUS_ITEM_PAYLOAD_MEMFD: {
+			struct file *f;
+
+			f = kdbus_get_memfd(&item->memfd);
+			if (IS_ERR(f))
+				return PTR_ERR(f);
+
+			gaps->memfd_files[gaps->n_memfds] = f;
+			gaps->memfd_offsets[gaps->n_memfds] =
+					(u8 *)&item->memfd.fd - (u8 *)msg;
+			++gaps->n_memfds;
+
+			/* memfds cannot be merged */
+			prev_payload = item;
+			prev_part = NULL;
+
+			/* insert padding to make following VECs aligned */
+			if (item->memfd.size % 8) {
+				part = &staging->parts[staging->n_parts++];
+				part->iov_len = item->memfd.size % 8;
+				part->iov_base = (char __user *)zeros;
+				staging->n_payload += part->iov_len;
+			}
+
+			break;
+		}
+		case KDBUS_ITEM_FDS: {
+			size_t i, n_fds;
+
+			n_fds = KDBUS_ITEM_PAYLOAD_SIZE(item) / sizeof(int);
+			for (i = 0; i < n_fds; ++i) {
+				struct file *f;
+
+				f = kdbus_get_fd(item->fds[i]);
+				if (IS_ERR(f))
+					return PTR_ERR(f);
+
+				gaps->fd_files[gaps->n_fds++] = f;
+			}
+
+			gaps->fd_offset = (u8 *)item->fds - (u8 *)msg;
+
+			break;
+		}
+		case KDBUS_ITEM_BLOOM_FILTER:
+			staging->bloom_filter = &item->bloom_filter;
+			break;
+		case KDBUS_ITEM_DST_NAME:
+			staging->dst_name = item->str;
+			break;
+		}
+
+		/* drop item if we merged it with a previous one */
+		if (drop_item) {
+			drop_item = false;
+		} else {
+			last = item;
+			item = KDBUS_ITEM_NEXT(item);
+		}
+	}
+
+	/* adjust message size regarding dropped items */
+	msg->size = offsetof(struct kdbus_msg, items);
+	if (last)
+		msg->size += ((u8 *)last - (u8 *)msg->items) + last->size;
+
+	return 0;
+}
+
+static void kdbus_staging_reserve(struct kdbus_staging *staging)
+{
+	struct iovec *part;
+
+	part = &staging->parts[staging->n_parts++];
+	part->iov_base = (void __user *)zeros;
+	part->iov_len = 0;
+}
+
+static struct kdbus_staging *kdbus_staging_new(struct kdbus_bus *bus,
+					       size_t n_parts,
+					       size_t msg_extra_size)
+{
+	const size_t reserved_parts = 5; /* see below for explanation */
+	struct kdbus_staging *staging;
+	int ret;
+
+	n_parts += reserved_parts;
+
+	staging = kzalloc(sizeof(*staging) + n_parts * sizeof(*staging->parts) +
+			  msg_extra_size, GFP_TEMPORARY);
+	if (!staging)
+		return ERR_PTR(-ENOMEM);
+
+	staging->msg_seqnum = atomic64_inc_return(&bus->last_message_id);
+	staging->n_parts = 0; /* we reserve n_parts, but don't enforce them */
+	staging->parts = (void *)(staging + 1);
+
+	if (msg_extra_size) /* if requested, allocate message, too */
+		staging->msg = (void *)((u8 *)staging->parts +
+				        n_parts * sizeof(*staging->parts));
+
+	staging->meta_proc = kdbus_meta_proc_new();
+	if (IS_ERR(staging->meta_proc)) {
+		ret = PTR_ERR(staging->meta_proc);
+		staging->meta_proc = NULL;
+		goto error;
+	}
+
+	staging->meta_conn = kdbus_meta_conn_new();
+	if (IS_ERR(staging->meta_conn)) {
+		ret = PTR_ERR(staging->meta_conn);
+		staging->meta_conn = NULL;
+		goto error;
+	}
+
+	/*
+	 * Prepare iovecs to copy the message into the target pool. We use the
+	 * following iovecs:
+	 *   * iovec to copy "kdbus_msg.size"
+	 *   * iovec to copy "struct kdbus_msg" (minus size) plus items
+	 *   * iovec for possible padding after the items
+	 *   * iovec for metadata items
+	 *   * iovec for possible padding after the items
+	 *
+	 * Make sure to update @reserved_parts if you add more parts here.
+	 */
+
+	kdbus_staging_reserve(staging); /* msg.size */
+	kdbus_staging_reserve(staging); /* msg (minus msg.size) plus items */
+	kdbus_staging_reserve(staging); /* msg padding */
+	kdbus_staging_reserve(staging); /* meta */
+	kdbus_staging_reserve(staging); /* meta padding */
+
+	return staging;
+
+error:
+	kdbus_staging_free(staging);
+	return ERR_PTR(ret);
+}
+
+struct kdbus_staging *kdbus_staging_new_kernel(struct kdbus_bus *bus,
+					       u64 dst, u64 cookie_timeout,
+					       size_t it_size, size_t it_type)
+{
+	struct kdbus_staging *staging;
+	size_t size;
+
+	size = offsetof(struct kdbus_msg, items) +
+	       KDBUS_ITEM_HEADER_SIZE + it_size;
+
+	staging = kdbus_staging_new(bus, 0, KDBUS_ALIGN8(size));
+	if (IS_ERR(staging))
+		return ERR_CAST(staging);
+
+	staging->msg->size = size;
+	staging->msg->flags = (dst == KDBUS_DST_ID_BROADCAST) ?
+							KDBUS_MSG_SIGNAL : 0;
+	staging->msg->dst_id = dst;
+	staging->msg->src_id = KDBUS_SRC_ID_KERNEL;
+	staging->msg->payload_type = KDBUS_PAYLOAD_KERNEL;
+	staging->msg->cookie_reply = cookie_timeout;
+	staging->notify = staging->msg->items;
+	staging->notify->size = KDBUS_ITEM_HEADER_SIZE + it_size;
+	staging->notify->type = it_type;
+
+	return staging;
+}
+
+struct kdbus_staging *kdbus_staging_new_user(struct kdbus_bus *bus,
+					     struct kdbus_cmd_send *cmd,
+					     struct kdbus_msg *msg)
+{
+	const size_t reserved_parts = 1; /* see below for explanation */
+	size_t n_memfds, n_fds, n_parts;
+	struct kdbus_staging *staging;
+	int ret;
+
+	/*
+	 * Examine user-supplied message and figure out how many resources we
+	 * need to allocate in our staging area. This requires us to iterate
+	 * the message twice, but saves us from re-allocating our resources
+	 * all the time.
+	 */
+
+	ret = kdbus_msg_examine(msg, bus, cmd, &n_memfds, &n_fds, &n_parts);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	n_parts += reserved_parts;
+
+	/*
+	 * Allocate staging area with the number of required resources. Make
+	 * sure that we have enough iovecs for all required parts pre-allocated
+	 * so this will hopefully be the only memory allocation for this
+	 * message transaction.
+	 */
+
+	staging = kdbus_staging_new(bus, n_parts, 0);
+	if (IS_ERR(staging))
+		return ERR_CAST(staging);
+
+	staging->msg = msg;
+
+	/*
+	 * If the message contains memfds or fd items, we need to remember some
+	 * state so we can fill in the requested information at RECV time.
+	 * File-descriptors cannot be passed at SEND time. Hence, allocate a
+	 * gaps-object to remember that state. That gaps object is linked to
+	 * from the staging area, but will also be linked to from the message
+	 * queue of each peer. Hence, each receiver owns a reference to it, and
+	 * it will later be used to fill the 'gaps' in message that couldn't be
+	 * filled at SEND time.
+	 * Note that the 'gaps' object is read-only once the staging-allocator
+	 * returns. There might be connections receiving a queued message while
+	 * the sender still broadcasts the message to other receivers.
+	 */
+
+	if (n_memfds > 0 || n_fds > 0) {
+		staging->gaps = kdbus_gaps_new(n_memfds, n_fds);
+		if (IS_ERR(staging->gaps)) {
+			ret = PTR_ERR(staging->gaps);
+			staging->gaps = NULL;
+			kdbus_staging_free(staging);
+			return ERR_PTR(ret);
+		}
+	}
+
+	/*
+	 * kdbus_staging_new() already reserves parts for message setup. For
+	 * user-supplied messages, we add the following iovecs:
+	 *   ... variable number of iovecs for payload ...
+	 *   * final iovec for possible padding of payload
+	 *
+	 * Make sure to update @reserved_parts if you add more parts here.
+	 */
+
+	ret = kdbus_staging_import(staging); /* payload */
+	kdbus_staging_reserve(staging); /* payload padding */
+
+	if (ret < 0)
+		goto error;
+
+	return staging;
+
+error:
+	kdbus_staging_free(staging);
+	return ERR_PTR(ret);
+}
+
+struct kdbus_staging *kdbus_staging_free(struct kdbus_staging *staging)
+{
+	if (!staging)
+		return NULL;
+
+	kdbus_meta_conn_unref(staging->meta_conn);
+	kdbus_meta_proc_unref(staging->meta_proc);
+	kdbus_gaps_unref(staging->gaps);
+	kfree(staging);
+
+	return NULL;
+}
+
+static int kdbus_staging_collect_metadata(struct kdbus_staging *staging,
+					  struct kdbus_conn *src,
+					  struct kdbus_conn *dst,
+					  u64 *out_attach)
+{
+	u64 attach;
+	int ret;
+
+	if (src)
+		attach = kdbus_meta_msg_mask(src, dst);
+	else
+		attach = KDBUS_ATTACH_TIMESTAMP; /* metadata for kernel msgs */
+
+	if (src && !src->meta_fake) {
+		ret = kdbus_meta_proc_collect(staging->meta_proc, attach);
+		if (ret < 0)
+			return ret;
+	}
+
+	ret = kdbus_meta_conn_collect(staging->meta_conn, src,
+				      staging->msg_seqnum, attach);
+	if (ret < 0)
+		return ret;
+
+	*out_attach = attach;
+	return 0;
+}
+
+/**
+ * kdbus_staging_emit() - emit linearized message in target pool
+ * @staging:		staging object to create message from
+ * @src:		sender of the message (or NULL)
+ * @dst:		target connection to allocate message for
+ *
+ * This allocates a pool-slice for @dst and copies the message provided by
+ * @staging into it. The new slice is then returned to the caller for further
+ * processing. It's not linked into any queue, yet.
+ *
+ * Return: Newly allocated slice or ERR_PTR on failure.
+ */
+struct kdbus_pool_slice *kdbus_staging_emit(struct kdbus_staging *staging,
+					    struct kdbus_conn *src,
+					    struct kdbus_conn *dst)
+{
+	struct kdbus_item *item, *meta_items = NULL;
+	struct kdbus_pool_slice *slice = NULL;
+	size_t off, size, meta_size;
+	struct iovec *v;
+	u64 attach, msg_size;
+	int ret;
+
+	/*
+	 * Step 1:
+	 * Collect metadata from @src depending on the attach-flags allowed for
+	 * @dst. Translate it into the namespaces pinned by @dst.
+	 */
+
+	ret = kdbus_staging_collect_metadata(staging, src, dst, &attach);
+	if (ret < 0)
+		goto error;
+
+	ret = kdbus_meta_emit(staging->meta_proc, NULL, staging->meta_conn,
+			      dst, attach, &meta_items, &meta_size);
+	if (ret < 0)
+		goto error;
+
+	/*
+	 * Step 2:
+	 * Setup iovecs for the message. See kdbus_staging_new() for allocation
+	 * of those iovecs. All reserved iovecs have been initialized with
+	 * iov_len=0 + iov_base=zeros. Furthermore, the iovecs to copy the
+	 * actual message payload have already been initialized and need not be
+	 * touched.
+	 */
+
+	v = staging->parts;
+	msg_size = staging->msg->size;
+
+	/* msg.size */
+	v->iov_len = sizeof(msg_size);
+	v->iov_base = (void __user *)&msg_size;
+	++v;
+
+	/* msg (after msg.size) plus items */
+	v->iov_len = staging->msg->size - sizeof(staging->msg->size);
+	v->iov_base = (void __user *)((u8 *)staging->msg +
+				      sizeof(staging->msg->size));
+	++v;
+
+	/* padding after msg */
+	v->iov_len = KDBUS_ALIGN8(staging->msg->size) - staging->msg->size;
+	v->iov_base = (void __user *)zeros;
+	++v;
+
+	if (meta_size > 0) {
+		/* metadata items */
+		v->iov_len = meta_size;
+		v->iov_base = (void __user *)meta_items;
+		++v;
+
+		/* padding after metadata */
+		v->iov_len = KDBUS_ALIGN8(meta_size) - meta_size;
+		v->iov_base = (void __user *)zeros;
+		++v;
+
+		msg_size = KDBUS_ALIGN8(msg_size) + meta_size;
+	} else {
+		/* metadata items */
+		v->iov_len = 0;
+		v->iov_base = (void __user *)zeros;
+		++v;
+
+		/* padding after metadata */
+		v->iov_len = 0;
+		v->iov_base = (void __user *)zeros;
+		++v;
+	}
+
+	/* ... payload iovecs are already filled in ... */
+
+	/* compute overall size and fill in padding after payload */
+	size = KDBUS_ALIGN8(msg_size);
+
+	if (staging->n_payload > 0) {
+		size += staging->n_payload;
+
+		v = &staging->parts[staging->n_parts - 1];
+		v->iov_len = KDBUS_ALIGN8(size) - size;
+		v->iov_base = (void __user *)zeros;
+
+		size = KDBUS_ALIGN8(size);
+	}
+
+	/*
+	 * Step 3:
+	 * The PAYLOAD_OFF items in the message contain a relative 'offset'
+	 * field that tells the receiver where to find the actual payload. This
+	 * offset is relative to the start of the message, and as such depends
+	 * on the size of the metadata items we inserted. This size is variable
+	 * and changes for each peer we send the message to. Hence, we remember
+	 * the last relative offset that was used to calculate the 'offset'
+	 * fields. For each message, we re-calculate it and patch all items, in
+	 * case it changed.
+	 */
+
+	off = KDBUS_ALIGN8(msg_size);
+
+	if (off != staging->i_payload) {
+		KDBUS_ITEMS_FOREACH(item, staging->msg->items,
+				    KDBUS_ITEMS_SIZE(staging->msg, items)) {
+			if (item->type != KDBUS_ITEM_PAYLOAD_OFF)
+				continue;
+
+			item->vec.offset -= staging->i_payload;
+			item->vec.offset += off;
+		}
+
+		staging->i_payload = off;
+	}
+
+	/*
+	 * Step 4:
+	 * Allocate pool slice and copy over all data. Make sure to properly
+	 * account on user quota.
+	 */
+
+	ret = kdbus_conn_quota_inc(dst, src ? src->user : NULL, size,
+				   staging->gaps ? staging->gaps->n_fds : 0);
+	if (ret < 0)
+		goto error;
+
+	slice = kdbus_pool_slice_alloc(dst->pool, size, true);
+	if (IS_ERR(slice)) {
+		ret = PTR_ERR(slice);
+		slice = NULL;
+		goto error;
+	}
+
+	WARN_ON(kdbus_pool_slice_size(slice) != size);
+
+	ret = kdbus_pool_slice_copy_iovec(slice, 0, staging->parts,
+					  staging->n_parts, size);
+	if (ret < 0)
+		goto error;
+
+	/* all done, return slice to caller */
+	goto exit;
+
+error:
+	if (slice)
+		kdbus_conn_quota_dec(dst, src ? src->user : NULL, size,
+				     staging->gaps ? staging->gaps->n_fds : 0);
+	kdbus_pool_slice_release(slice);
+	slice = ERR_PTR(ret);
+exit:
+	kfree(meta_items);
+	return slice;
+}
diff -Nur linux-4.2/ipc/kdbus/message.h linux-4.2-pck/ipc/kdbus/message.h
--- linux-4.2/ipc/kdbus/message.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/message.h	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_MESSAGE_H
+#define __KDBUS_MESSAGE_H
+
+#include <linux/fs.h>
+#include <linux/kref.h>
+#include <uapi/linux/kdbus.h>
+
+struct kdbus_bus;
+struct kdbus_conn;
+struct kdbus_meta_conn;
+struct kdbus_meta_proc;
+struct kdbus_pool_slice;
+
+/**
+ * struct kdbus_gaps - gaps in message to be filled later
+ * @kref:		Reference counter
+ * @n_memfd_offs:	Number of memfds
+ * @memfd_offs:		Offsets of kdbus_memfd items in target slice
+ * @n_fds:		Number of fds
+ * @fds:		Array of sent fds
+ * @fds_offset:		Offset of fd-array in target slice
+ *
+ * The 'gaps' object is used to track data that is needed to fill gaps in a
+ * message at RECV time. Usually, we try to compile the whole message at SEND
+ * time. This has the advantage, that we don't have to cache any information and
+ * can keep the memory consumption small. Furthermore, all copy operations can
+ * be combined into a single function call, which speeds up transactions
+ * considerably.
+ * However, things like file-descriptors can only be fully installed at RECV
+ * time. The gaps object tracks this data and pins it until a message is
+ * received. The gaps object is shared between all receivers of the same
+ * message.
+ */
+struct kdbus_gaps {
+	struct kref kref;
+
+	/* state tracking for KDBUS_ITEM_PAYLOAD_MEMFD entries */
+	size_t n_memfds;
+	u64 *memfd_offsets;
+	struct file **memfd_files;
+
+	/* state tracking for KDBUS_ITEM_FDS */
+	size_t n_fds;
+	struct file **fd_files;
+	u64 fd_offset;
+};
+
+struct kdbus_gaps *kdbus_gaps_ref(struct kdbus_gaps *gaps);
+struct kdbus_gaps *kdbus_gaps_unref(struct kdbus_gaps *gaps);
+int kdbus_gaps_install(struct kdbus_gaps *gaps, struct kdbus_pool_slice *slice,
+		       bool *out_incomplete);
+
+/**
+ * struct kdbus_staging - staging area to import messages
+ * @msg:		User-supplied message
+ * @gaps:		Gaps-object created during import (or NULL if empty)
+ * @msg_seqnum:		Message sequence number
+ * @notify_entry:	Entry into list of kernel-generated notifications
+ * @i_payload:		Current relative index of start of payload
+ * @n_payload:		Total number of bytes needed for payload
+ * @n_parts:		Number of parts
+ * @parts:		Array of iovecs that make up the whole message
+ * @meta_proc:		Process metadata of the sender (or NULL if empty)
+ * @meta_conn:		Connection metadata of the sender (or NULL if empty)
+ * @bloom_filter:	Pointer to the bloom-item in @msg, or NULL
+ * @dst_name:		Pointer to the dst-name-item in @msg, or NULL
+ * @notify:		Pointer to the notification item in @msg, or NULL
+ *
+ * The kdbus_staging object is a temporary staging area to import user-supplied
+ * messages into the kernel. It is only used during SEND and dropped once the
+ * message is queued. Any data that cannot be collected during SEND, is
+ * collected in a kdbus_gaps object and attached to the message queue.
+ */
+struct kdbus_staging {
+	struct kdbus_msg *msg;
+	struct kdbus_gaps *gaps;
+	u64 msg_seqnum;
+	struct list_head notify_entry;
+
+	/* crafted iovecs to copy the message */
+	size_t i_payload;
+	size_t n_payload;
+	size_t n_parts;
+	struct iovec *parts;
+
+	/* metadata state */
+	struct kdbus_meta_proc *meta_proc;
+	struct kdbus_meta_conn *meta_conn;
+
+	/* cached pointers into @msg */
+	const struct kdbus_bloom_filter *bloom_filter;
+	const char *dst_name;
+	struct kdbus_item *notify;
+};
+
+struct kdbus_staging *kdbus_staging_new_kernel(struct kdbus_bus *bus,
+					       u64 dst, u64 cookie_timeout,
+					       size_t it_size, size_t it_type);
+struct kdbus_staging *kdbus_staging_new_user(struct kdbus_bus *bus,
+					     struct kdbus_cmd_send *cmd,
+					     struct kdbus_msg *msg);
+struct kdbus_staging *kdbus_staging_free(struct kdbus_staging *staging);
+struct kdbus_pool_slice *kdbus_staging_emit(struct kdbus_staging *staging,
+					    struct kdbus_conn *src,
+					    struct kdbus_conn *dst);
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/metadata.c linux-4.2-pck/ipc/kdbus/metadata.c
--- linux-4.2/ipc/kdbus/metadata.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/metadata.c	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,1347 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/audit.h>
+#include <linux/capability.h>
+#include <linux/cgroup.h>
+#include <linux/cred.h>
+#include <linux/file.h>
+#include <linux/fs_struct.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/security.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/uidgid.h>
+#include <linux/uio.h>
+#include <linux/user_namespace.h>
+
+#include "bus.h"
+#include "connection.h"
+#include "endpoint.h"
+#include "item.h"
+#include "message.h"
+#include "metadata.h"
+#include "names.h"
+
+/**
+ * struct kdbus_meta_proc - Process metadata
+ * @kref:		Reference counting
+ * @lock:		Object lock
+ * @collected:		Bitmask of collected items
+ * @valid:		Bitmask of collected and valid items
+ * @cred:		Credentials
+ * @pid:		PID of process
+ * @tgid:		TGID of process
+ * @ppid:		PPID of process
+ * @tid_comm:		TID comm line
+ * @pid_comm:		PID comm line
+ * @exe_path:		Executable path
+ * @root_path:		Root-FS path
+ * @cmdline:		Command-line
+ * @cgroup:		Full cgroup path
+ * @seclabel:		Seclabel
+ * @audit_loginuid:	Audit login-UID
+ * @audit_sessionid:	Audit session-ID
+ */
+struct kdbus_meta_proc {
+	struct kref kref;
+	struct mutex lock;
+	u64 collected;
+	u64 valid;
+
+	/* KDBUS_ITEM_CREDS */
+	/* KDBUS_ITEM_AUXGROUPS */
+	/* KDBUS_ITEM_CAPS */
+	const struct cred *cred;
+
+	/* KDBUS_ITEM_PIDS */
+	struct pid *pid;
+	struct pid *tgid;
+	struct pid *ppid;
+
+	/* KDBUS_ITEM_TID_COMM */
+	char tid_comm[TASK_COMM_LEN];
+	/* KDBUS_ITEM_PID_COMM */
+	char pid_comm[TASK_COMM_LEN];
+
+	/* KDBUS_ITEM_EXE */
+	struct path exe_path;
+	struct path root_path;
+
+	/* KDBUS_ITEM_CMDLINE */
+	char *cmdline;
+
+	/* KDBUS_ITEM_CGROUP */
+	char *cgroup;
+
+	/* KDBUS_ITEM_SECLABEL */
+	char *seclabel;
+
+	/* KDBUS_ITEM_AUDIT */
+	kuid_t audit_loginuid;
+	unsigned int audit_sessionid;
+};
+
+/**
+ * struct kdbus_meta_conn
+ * @kref:		Reference counting
+ * @lock:		Object lock
+ * @collected:		Bitmask of collected items
+ * @valid:		Bitmask of collected and valid items
+ * @ts:			Timestamp values
+ * @owned_names_items:	Serialized items for owned names
+ * @owned_names_size:	Size of @owned_names_items
+ * @conn_description:	Connection description
+ */
+struct kdbus_meta_conn {
+	struct kref kref;
+	struct mutex lock;
+	u64 collected;
+	u64 valid;
+
+	/* KDBUS_ITEM_TIMESTAMP */
+	struct kdbus_timestamp ts;
+
+	/* KDBUS_ITEM_OWNED_NAME */
+	struct kdbus_item *owned_names_items;
+	size_t owned_names_size;
+
+	/* KDBUS_ITEM_CONN_DESCRIPTION */
+	char *conn_description;
+};
+
+/* fixed size equivalent of "kdbus_caps" */
+struct kdbus_meta_caps {
+	u32 last_cap;
+	struct {
+		u32 caps[_KERNEL_CAPABILITY_U32S];
+	} set[4];
+};
+
+/**
+ * kdbus_meta_proc_new() - Create process metadata object
+ *
+ * Return: Pointer to new object on success, ERR_PTR on failure.
+ */
+struct kdbus_meta_proc *kdbus_meta_proc_new(void)
+{
+	struct kdbus_meta_proc *mp;
+
+	mp = kzalloc(sizeof(*mp), GFP_KERNEL);
+	if (!mp)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&mp->kref);
+	mutex_init(&mp->lock);
+
+	return mp;
+}
+
+static void kdbus_meta_proc_free(struct kref *kref)
+{
+	struct kdbus_meta_proc *mp = container_of(kref, struct kdbus_meta_proc,
+						  kref);
+
+	path_put(&mp->exe_path);
+	path_put(&mp->root_path);
+	if (mp->cred)
+		put_cred(mp->cred);
+	put_pid(mp->ppid);
+	put_pid(mp->tgid);
+	put_pid(mp->pid);
+
+	kfree(mp->seclabel);
+	kfree(mp->cmdline);
+	kfree(mp->cgroup);
+	kfree(mp);
+}
+
+/**
+ * kdbus_meta_proc_ref() - Gain reference
+ * @mp:		Process metadata object
+ *
+ * Return: @mp is returned
+ */
+struct kdbus_meta_proc *kdbus_meta_proc_ref(struct kdbus_meta_proc *mp)
+{
+	if (mp)
+		kref_get(&mp->kref);
+	return mp;
+}
+
+/**
+ * kdbus_meta_proc_unref() - Drop reference
+ * @mp:		Process metadata object
+ *
+ * Return: NULL
+ */
+struct kdbus_meta_proc *kdbus_meta_proc_unref(struct kdbus_meta_proc *mp)
+{
+	if (mp)
+		kref_put(&mp->kref, kdbus_meta_proc_free);
+	return NULL;
+}
+
+static void kdbus_meta_proc_collect_pids(struct kdbus_meta_proc *mp)
+{
+	struct task_struct *parent;
+
+	mp->pid = get_pid(task_pid(current));
+	mp->tgid = get_pid(task_tgid(current));
+
+	rcu_read_lock();
+	parent = rcu_dereference(current->real_parent);
+	mp->ppid = get_pid(task_tgid(parent));
+	rcu_read_unlock();
+
+	mp->valid |= KDBUS_ATTACH_PIDS;
+}
+
+static void kdbus_meta_proc_collect_tid_comm(struct kdbus_meta_proc *mp)
+{
+	get_task_comm(mp->tid_comm, current);
+	mp->valid |= KDBUS_ATTACH_TID_COMM;
+}
+
+static void kdbus_meta_proc_collect_pid_comm(struct kdbus_meta_proc *mp)
+{
+	get_task_comm(mp->pid_comm, current->group_leader);
+	mp->valid |= KDBUS_ATTACH_PID_COMM;
+}
+
+static void kdbus_meta_proc_collect_exe(struct kdbus_meta_proc *mp)
+{
+	struct file *exe_file;
+
+	rcu_read_lock();
+	exe_file = rcu_dereference(current->mm->exe_file);
+	if (exe_file) {
+		mp->exe_path = exe_file->f_path;
+		path_get(&mp->exe_path);
+		get_fs_root(current->fs, &mp->root_path);
+		mp->valid |= KDBUS_ATTACH_EXE;
+	}
+	rcu_read_unlock();
+}
+
+static int kdbus_meta_proc_collect_cmdline(struct kdbus_meta_proc *mp)
+{
+	struct mm_struct *mm = current->mm;
+	char *cmdline;
+
+	if (!mm->arg_end)
+		return 0;
+
+	cmdline = strndup_user((const char __user *)mm->arg_start,
+			       mm->arg_end - mm->arg_start);
+	if (IS_ERR(cmdline))
+		return PTR_ERR(cmdline);
+
+	mp->cmdline = cmdline;
+	mp->valid |= KDBUS_ATTACH_CMDLINE;
+
+	return 0;
+}
+
+static int kdbus_meta_proc_collect_cgroup(struct kdbus_meta_proc *mp)
+{
+#ifdef CONFIG_CGROUPS
+	void *page;
+	char *s;
+
+	page = (void *)__get_free_page(GFP_TEMPORARY);
+	if (!page)
+		return -ENOMEM;
+
+	s = task_cgroup_path(current, page, PAGE_SIZE);
+	if (s) {
+		mp->cgroup = kstrdup(s, GFP_KERNEL);
+		if (!mp->cgroup) {
+			free_page((unsigned long)page);
+			return -ENOMEM;
+		}
+	}
+
+	free_page((unsigned long)page);
+	mp->valid |= KDBUS_ATTACH_CGROUP;
+#endif
+
+	return 0;
+}
+
+static int kdbus_meta_proc_collect_seclabel(struct kdbus_meta_proc *mp)
+{
+#ifdef CONFIG_SECURITY
+	char *ctx = NULL;
+	u32 sid, len;
+	int ret;
+
+	security_task_getsecid(current, &sid);
+	ret = security_secid_to_secctx(sid, &ctx, &len);
+	if (ret < 0) {
+		/*
+		 * EOPNOTSUPP means no security module is active,
+		 * lets skip adding the seclabel then. This effectively
+		 * drops the SECLABEL item.
+		 */
+		return (ret == -EOPNOTSUPP) ? 0 : ret;
+	}
+
+	mp->seclabel = kstrdup(ctx, GFP_KERNEL);
+	security_release_secctx(ctx, len);
+	if (!mp->seclabel)
+		return -ENOMEM;
+
+	mp->valid |= KDBUS_ATTACH_SECLABEL;
+#endif
+
+	return 0;
+}
+
+static void kdbus_meta_proc_collect_audit(struct kdbus_meta_proc *mp)
+{
+#ifdef CONFIG_AUDITSYSCALL
+	mp->audit_loginuid = audit_get_loginuid(current);
+	mp->audit_sessionid = audit_get_sessionid(current);
+	mp->valid |= KDBUS_ATTACH_AUDIT;
+#endif
+}
+
+/**
+ * kdbus_meta_proc_collect() - Collect process metadata
+ * @mp:		Process metadata object
+ * @what:	Attach flags to collect
+ *
+ * This collects process metadata from current and saves it in @mp.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kdbus_meta_proc_collect(struct kdbus_meta_proc *mp, u64 what)
+{
+	int ret;
+
+	if (!mp || !(what & (KDBUS_ATTACH_CREDS |
+			     KDBUS_ATTACH_PIDS |
+			     KDBUS_ATTACH_AUXGROUPS |
+			     KDBUS_ATTACH_TID_COMM |
+			     KDBUS_ATTACH_PID_COMM |
+			     KDBUS_ATTACH_EXE |
+			     KDBUS_ATTACH_CMDLINE |
+			     KDBUS_ATTACH_CGROUP |
+			     KDBUS_ATTACH_CAPS |
+			     KDBUS_ATTACH_SECLABEL |
+			     KDBUS_ATTACH_AUDIT)))
+		return 0;
+
+	mutex_lock(&mp->lock);
+
+	/* creds, auxgrps and caps share "struct cred" as context */
+	{
+		const u64 m_cred = KDBUS_ATTACH_CREDS |
+				   KDBUS_ATTACH_AUXGROUPS |
+				   KDBUS_ATTACH_CAPS;
+
+		if ((what & m_cred) && !(mp->collected & m_cred)) {
+			mp->cred = get_current_cred();
+			mp->valid |= m_cred;
+			mp->collected |= m_cred;
+		}
+	}
+
+	if ((what & KDBUS_ATTACH_PIDS) &&
+	    !(mp->collected & KDBUS_ATTACH_PIDS)) {
+		kdbus_meta_proc_collect_pids(mp);
+		mp->collected |= KDBUS_ATTACH_PIDS;
+	}
+
+	if ((what & KDBUS_ATTACH_TID_COMM) &&
+	    !(mp->collected & KDBUS_ATTACH_TID_COMM)) {
+		kdbus_meta_proc_collect_tid_comm(mp);
+		mp->collected |= KDBUS_ATTACH_TID_COMM;
+	}
+
+	if ((what & KDBUS_ATTACH_PID_COMM) &&
+	    !(mp->collected & KDBUS_ATTACH_PID_COMM)) {
+		kdbus_meta_proc_collect_pid_comm(mp);
+		mp->collected |= KDBUS_ATTACH_PID_COMM;
+	}
+
+	if ((what & KDBUS_ATTACH_EXE) &&
+	    !(mp->collected & KDBUS_ATTACH_EXE)) {
+		kdbus_meta_proc_collect_exe(mp);
+		mp->collected |= KDBUS_ATTACH_EXE;
+	}
+
+	if ((what & KDBUS_ATTACH_CMDLINE) &&
+	    !(mp->collected & KDBUS_ATTACH_CMDLINE)) {
+		ret = kdbus_meta_proc_collect_cmdline(mp);
+		if (ret < 0)
+			goto exit_unlock;
+		mp->collected |= KDBUS_ATTACH_CMDLINE;
+	}
+
+	if ((what & KDBUS_ATTACH_CGROUP) &&
+	    !(mp->collected & KDBUS_ATTACH_CGROUP)) {
+		ret = kdbus_meta_proc_collect_cgroup(mp);
+		if (ret < 0)
+			goto exit_unlock;
+		mp->collected |= KDBUS_ATTACH_CGROUP;
+	}
+
+	if ((what & KDBUS_ATTACH_SECLABEL) &&
+	    !(mp->collected & KDBUS_ATTACH_SECLABEL)) {
+		ret = kdbus_meta_proc_collect_seclabel(mp);
+		if (ret < 0)
+			goto exit_unlock;
+		mp->collected |= KDBUS_ATTACH_SECLABEL;
+	}
+
+	if ((what & KDBUS_ATTACH_AUDIT) &&
+	    !(mp->collected & KDBUS_ATTACH_AUDIT)) {
+		kdbus_meta_proc_collect_audit(mp);
+		mp->collected |= KDBUS_ATTACH_AUDIT;
+	}
+
+	ret = 0;
+
+exit_unlock:
+	mutex_unlock(&mp->lock);
+	return ret;
+}
+
+/**
+ * kdbus_meta_fake_new() - Create fake metadata object
+ *
+ * Return: Pointer to new object on success, ERR_PTR on failure.
+ */
+struct kdbus_meta_fake *kdbus_meta_fake_new(void)
+{
+	struct kdbus_meta_fake *mf;
+
+	mf = kzalloc(sizeof(*mf), GFP_KERNEL);
+	if (!mf)
+		return ERR_PTR(-ENOMEM);
+
+	return mf;
+}
+
+/**
+ * kdbus_meta_fake_free() - Free fake metadata object
+ * @mf:		Fake metadata object
+ *
+ * Return: NULL
+ */
+struct kdbus_meta_fake *kdbus_meta_fake_free(struct kdbus_meta_fake *mf)
+{
+	if (mf) {
+		put_pid(mf->ppid);
+		put_pid(mf->tgid);
+		put_pid(mf->pid);
+		kfree(mf->seclabel);
+		kfree(mf);
+	}
+
+	return NULL;
+}
+
+/**
+ * kdbus_meta_fake_collect() - Fill fake metadata from faked credentials
+ * @mf:		Fake metadata object
+ * @creds:	Creds to set, may be %NULL
+ * @pids:	PIDs to set, may be %NULL
+ * @seclabel:	Seclabel to set, may be %NULL
+ *
+ * This function takes information stored in @creds, @pids and @seclabel and
+ * resolves them to kernel-representations, if possible. This call uses the
+ * current task's namespaces to resolve the given information.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kdbus_meta_fake_collect(struct kdbus_meta_fake *mf,
+			    const struct kdbus_creds *creds,
+			    const struct kdbus_pids *pids,
+			    const char *seclabel)
+{
+	if (mf->valid)
+		return -EALREADY;
+
+	if (creds) {
+		struct user_namespace *ns = current_user_ns();
+
+		mf->uid		= make_kuid(ns, creds->uid);
+		mf->euid	= make_kuid(ns, creds->euid);
+		mf->suid	= make_kuid(ns, creds->suid);
+		mf->fsuid	= make_kuid(ns, creds->fsuid);
+
+		mf->gid		= make_kgid(ns, creds->gid);
+		mf->egid	= make_kgid(ns, creds->egid);
+		mf->sgid	= make_kgid(ns, creds->sgid);
+		mf->fsgid	= make_kgid(ns, creds->fsgid);
+
+		if ((creds->uid   != (uid_t)-1 && !uid_valid(mf->uid))   ||
+		    (creds->euid  != (uid_t)-1 && !uid_valid(mf->euid))  ||
+		    (creds->suid  != (uid_t)-1 && !uid_valid(mf->suid))  ||
+		    (creds->fsuid != (uid_t)-1 && !uid_valid(mf->fsuid)) ||
+		    (creds->gid   != (gid_t)-1 && !gid_valid(mf->gid))   ||
+		    (creds->egid  != (gid_t)-1 && !gid_valid(mf->egid))  ||
+		    (creds->sgid  != (gid_t)-1 && !gid_valid(mf->sgid))  ||
+		    (creds->fsgid != (gid_t)-1 && !gid_valid(mf->fsgid)))
+			return -EINVAL;
+
+		mf->valid |= KDBUS_ATTACH_CREDS;
+	}
+
+	if (pids) {
+		mf->pid = get_pid(find_vpid(pids->tid));
+		mf->tgid = get_pid(find_vpid(pids->pid));
+		mf->ppid = get_pid(find_vpid(pids->ppid));
+
+		if ((pids->tid != 0 && !mf->pid) ||
+		    (pids->pid != 0 && !mf->tgid) ||
+		    (pids->ppid != 0 && !mf->ppid)) {
+			put_pid(mf->pid);
+			put_pid(mf->tgid);
+			put_pid(mf->ppid);
+			mf->pid = NULL;
+			mf->tgid = NULL;
+			mf->ppid = NULL;
+			return -EINVAL;
+		}
+
+		mf->valid |= KDBUS_ATTACH_PIDS;
+	}
+
+	if (seclabel) {
+		mf->seclabel = kstrdup(seclabel, GFP_KERNEL);
+		if (!mf->seclabel)
+			return -ENOMEM;
+
+		mf->valid |= KDBUS_ATTACH_SECLABEL;
+	}
+
+	return 0;
+}
+
+/**
+ * kdbus_meta_conn_new() - Create connection metadata object
+ *
+ * Return: Pointer to new object on success, ERR_PTR on failure.
+ */
+struct kdbus_meta_conn *kdbus_meta_conn_new(void)
+{
+	struct kdbus_meta_conn *mc;
+
+	mc = kzalloc(sizeof(*mc), GFP_KERNEL);
+	if (!mc)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&mc->kref);
+	mutex_init(&mc->lock);
+
+	return mc;
+}
+
+static void kdbus_meta_conn_free(struct kref *kref)
+{
+	struct kdbus_meta_conn *mc =
+		container_of(kref, struct kdbus_meta_conn, kref);
+
+	kfree(mc->conn_description);
+	kfree(mc->owned_names_items);
+	kfree(mc);
+}
+
+/**
+ * kdbus_meta_conn_ref() - Gain reference
+ * @mc:		Connection metadata object
+ */
+struct kdbus_meta_conn *kdbus_meta_conn_ref(struct kdbus_meta_conn *mc)
+{
+	if (mc)
+		kref_get(&mc->kref);
+	return mc;
+}
+
+/**
+ * kdbus_meta_conn_unref() - Drop reference
+ * @mc:		Connection metadata object
+ */
+struct kdbus_meta_conn *kdbus_meta_conn_unref(struct kdbus_meta_conn *mc)
+{
+	if (mc)
+		kref_put(&mc->kref, kdbus_meta_conn_free);
+	return NULL;
+}
+
+static void kdbus_meta_conn_collect_timestamp(struct kdbus_meta_conn *mc,
+					      u64 msg_seqnum)
+{
+	mc->ts.monotonic_ns = ktime_get_ns();
+	mc->ts.realtime_ns = ktime_get_real_ns();
+
+	if (msg_seqnum)
+		mc->ts.seqnum = msg_seqnum;
+
+	mc->valid |= KDBUS_ATTACH_TIMESTAMP;
+}
+
+static int kdbus_meta_conn_collect_names(struct kdbus_meta_conn *mc,
+					 struct kdbus_conn *conn)
+{
+	const struct kdbus_name_owner *owner;
+	struct kdbus_item *item;
+	size_t slen, size;
+
+	lockdep_assert_held(&conn->ep->bus->name_registry->rwlock);
+
+	size = 0;
+	/* open-code length calculation to avoid final padding */
+	list_for_each_entry(owner, &conn->names_list, conn_entry)
+		if (!(owner->flags & KDBUS_NAME_IN_QUEUE))
+			size = KDBUS_ALIGN8(size) + KDBUS_ITEM_HEADER_SIZE +
+				sizeof(struct kdbus_name) +
+				strlen(owner->name->name) + 1;
+
+	if (!size)
+		return 0;
+
+	/* make sure we include zeroed padding for convenience helpers */
+	item = kmalloc(KDBUS_ALIGN8(size), GFP_KERNEL);
+	if (!item)
+		return -ENOMEM;
+
+	mc->owned_names_items = item;
+	mc->owned_names_size = size;
+
+	list_for_each_entry(owner, &conn->names_list, conn_entry) {
+		if (owner->flags & KDBUS_NAME_IN_QUEUE)
+			continue;
+
+		slen = strlen(owner->name->name) + 1;
+		kdbus_item_set(item, KDBUS_ITEM_OWNED_NAME, NULL,
+			       sizeof(struct kdbus_name) + slen);
+		item->name.flags = owner->flags;
+		memcpy(item->name.name, owner->name->name, slen);
+		item = KDBUS_ITEM_NEXT(item);
+	}
+
+	/* sanity check: the buffer should be completely written now */
+	WARN_ON((u8 *)item !=
+			(u8 *)mc->owned_names_items + KDBUS_ALIGN8(size));
+
+	mc->valid |= KDBUS_ATTACH_NAMES;
+	return 0;
+}
+
+static int kdbus_meta_conn_collect_description(struct kdbus_meta_conn *mc,
+					       struct kdbus_conn *conn)
+{
+	if (!conn->description)
+		return 0;
+
+	mc->conn_description = kstrdup(conn->description, GFP_KERNEL);
+	if (!mc->conn_description)
+		return -ENOMEM;
+
+	mc->valid |= KDBUS_ATTACH_CONN_DESCRIPTION;
+	return 0;
+}
+
+/**
+ * kdbus_meta_conn_collect() - Collect connection metadata
+ * @mc:		Message metadata object
+ * @conn:	Connection to collect data from
+ * @msg_seqnum:	Sequence number of the message to send
+ * @what:	Attach flags to collect
+ *
+ * This collects connection metadata from @msg_seqnum and @conn and saves it
+ * in @mc.
+ *
+ * If KDBUS_ATTACH_NAMES is set in @what and @conn is non-NULL, the caller must
+ * hold the name-registry read-lock of conn->ep->bus->registry.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kdbus_meta_conn_collect(struct kdbus_meta_conn *mc,
+			    struct kdbus_conn *conn,
+			    u64 msg_seqnum, u64 what)
+{
+	int ret;
+
+	if (!mc || !(what & (KDBUS_ATTACH_TIMESTAMP |
+			     KDBUS_ATTACH_NAMES |
+			     KDBUS_ATTACH_CONN_DESCRIPTION)))
+		return 0;
+
+	mutex_lock(&mc->lock);
+
+	if (msg_seqnum && (what & KDBUS_ATTACH_TIMESTAMP) &&
+	    !(mc->collected & KDBUS_ATTACH_TIMESTAMP)) {
+		kdbus_meta_conn_collect_timestamp(mc, msg_seqnum);
+		mc->collected |= KDBUS_ATTACH_TIMESTAMP;
+	}
+
+	if (conn && (what & KDBUS_ATTACH_NAMES) &&
+	    !(mc->collected & KDBUS_ATTACH_NAMES)) {
+		ret = kdbus_meta_conn_collect_names(mc, conn);
+		if (ret < 0)
+			goto exit_unlock;
+		mc->collected |= KDBUS_ATTACH_NAMES;
+	}
+
+	if (conn && (what & KDBUS_ATTACH_CONN_DESCRIPTION) &&
+	    !(mc->collected & KDBUS_ATTACH_CONN_DESCRIPTION)) {
+		ret = kdbus_meta_conn_collect_description(mc, conn);
+		if (ret < 0)
+			goto exit_unlock;
+		mc->collected |= KDBUS_ATTACH_CONN_DESCRIPTION;
+	}
+
+	ret = 0;
+
+exit_unlock:
+	mutex_unlock(&mc->lock);
+	return ret;
+}
+
+static void kdbus_meta_export_caps(struct kdbus_meta_caps *out,
+				   const struct kdbus_meta_proc *mp,
+				   struct user_namespace *user_ns)
+{
+	struct user_namespace *iter;
+	const struct cred *cred = mp->cred;
+	bool parent = false, owner = false;
+	int i;
+
+	/*
+	 * This translates the effective capabilities of 'cred' into the given
+	 * user-namespace. If the given user-namespace is a child-namespace of
+	 * the user-namespace of 'cred', the mask can be copied verbatim. If
+	 * not, the mask is cleared.
+	 * There's one exception: If 'cred' is the owner of any user-namespace
+	 * in the path between the given user-namespace and the user-namespace
+	 * of 'cred', then it has all effective capabilities set. This means,
+	 * the user who created a user-namespace always has all effective
+	 * capabilities in any child namespaces. Note that this is based on the
+	 * uid of the namespace creator, not the task hierarchy.
+	 */
+	for (iter = user_ns; iter; iter = iter->parent) {
+		if (iter == cred->user_ns) {
+			parent = true;
+			break;
+		}
+
+		if (iter == &init_user_ns)
+			break;
+
+		if ((iter->parent == cred->user_ns) &&
+		    uid_eq(iter->owner, cred->euid)) {
+			owner = true;
+			break;
+		}
+	}
+
+	out->last_cap = CAP_LAST_CAP;
+
+	CAP_FOR_EACH_U32(i) {
+		if (parent) {
+			out->set[0].caps[i] = cred->cap_inheritable.cap[i];
+			out->set[1].caps[i] = cred->cap_permitted.cap[i];
+			out->set[2].caps[i] = cred->cap_effective.cap[i];
+			out->set[3].caps[i] = cred->cap_bset.cap[i];
+		} else if (owner) {
+			out->set[0].caps[i] = 0U;
+			out->set[1].caps[i] = ~0U;
+			out->set[2].caps[i] = ~0U;
+			out->set[3].caps[i] = ~0U;
+		} else {
+			out->set[0].caps[i] = 0U;
+			out->set[1].caps[i] = 0U;
+			out->set[2].caps[i] = 0U;
+			out->set[3].caps[i] = 0U;
+		}
+	}
+
+	/* clear unused bits */
+	for (i = 0; i < 4; i++)
+		out->set[i].caps[CAP_TO_INDEX(CAP_LAST_CAP)] &=
+					CAP_LAST_U32_VALID_MASK;
+}
+
+/* This is equivalent to from_kuid_munged(), but maps INVALID_UID to itself */
+static uid_t kdbus_from_kuid_keep(struct user_namespace *ns, kuid_t uid)
+{
+	return uid_valid(uid) ? from_kuid_munged(ns, uid) : ((uid_t)-1);
+}
+
+/* This is equivalent to from_kgid_munged(), but maps INVALID_GID to itself */
+static gid_t kdbus_from_kgid_keep(struct user_namespace *ns, kgid_t gid)
+{
+	return gid_valid(gid) ? from_kgid_munged(ns, gid) : ((gid_t)-1);
+}
+
+struct kdbus_meta_staging {
+	const struct kdbus_meta_proc *mp;
+	const struct kdbus_meta_fake *mf;
+	const struct kdbus_meta_conn *mc;
+	const struct kdbus_conn *conn;
+	u64 mask;
+
+	void *exe;
+	const char *exe_path;
+};
+
+static size_t kdbus_meta_measure(struct kdbus_meta_staging *staging)
+{
+	const struct kdbus_meta_proc *mp = staging->mp;
+	const struct kdbus_meta_fake *mf = staging->mf;
+	const struct kdbus_meta_conn *mc = staging->mc;
+	const u64 mask = staging->mask;
+	size_t size = 0;
+
+	/* process metadata */
+
+	if (mf && (mask & KDBUS_ATTACH_CREDS))
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_creds));
+	else if (mp && (mask & KDBUS_ATTACH_CREDS))
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_creds));
+
+	if (mf && (mask & KDBUS_ATTACH_PIDS))
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_pids));
+	else if (mp && (mask & KDBUS_ATTACH_PIDS))
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_pids));
+
+	if (mp && (mask & KDBUS_ATTACH_AUXGROUPS))
+		size += KDBUS_ITEM_SIZE(mp->cred->group_info->ngroups *
+					sizeof(u64));
+
+	if (mp && (mask & KDBUS_ATTACH_TID_COMM))
+		size += KDBUS_ITEM_SIZE(strlen(mp->tid_comm) + 1);
+
+	if (mp && (mask & KDBUS_ATTACH_PID_COMM))
+		size += KDBUS_ITEM_SIZE(strlen(mp->pid_comm) + 1);
+
+	if (staging->exe_path && (mask & KDBUS_ATTACH_EXE))
+		size += KDBUS_ITEM_SIZE(strlen(staging->exe_path) + 1);
+
+	if (mp && (mask & KDBUS_ATTACH_CMDLINE))
+		size += KDBUS_ITEM_SIZE(strlen(mp->cmdline) + 1);
+
+	if (mp && (mask & KDBUS_ATTACH_CGROUP))
+		size += KDBUS_ITEM_SIZE(strlen(mp->cgroup) + 1);
+
+	if (mp && (mask & KDBUS_ATTACH_CAPS))
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_meta_caps));
+
+	if (mf && (mask & KDBUS_ATTACH_SECLABEL))
+		size += KDBUS_ITEM_SIZE(strlen(mf->seclabel) + 1);
+	else if (mp && (mask & KDBUS_ATTACH_SECLABEL))
+		size += KDBUS_ITEM_SIZE(strlen(mp->seclabel) + 1);
+
+	if (mp && (mask & KDBUS_ATTACH_AUDIT))
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_audit));
+
+	/* connection metadata */
+
+	if (mc && (mask & KDBUS_ATTACH_NAMES))
+		size += KDBUS_ALIGN8(mc->owned_names_size);
+
+	if (mc && (mask & KDBUS_ATTACH_CONN_DESCRIPTION))
+		size += KDBUS_ITEM_SIZE(strlen(mc->conn_description) + 1);
+
+	if (mc && (mask & KDBUS_ATTACH_TIMESTAMP))
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_timestamp));
+
+	return size;
+}
+
+static struct kdbus_item *kdbus_write_head(struct kdbus_item **iter,
+					   u64 type, u64 size)
+{
+	struct kdbus_item *item = *iter;
+	size_t padding;
+
+	item->type = type;
+	item->size = KDBUS_ITEM_HEADER_SIZE + size;
+
+	/* clear padding */
+	padding = KDBUS_ALIGN8(item->size) - item->size;
+	if (padding)
+		memset(item->data + size, 0, padding);
+
+	*iter = KDBUS_ITEM_NEXT(item);
+	return item;
+}
+
+static struct kdbus_item *kdbus_write_full(struct kdbus_item **iter,
+					   u64 type, u64 size, const void *data)
+{
+	struct kdbus_item *item;
+
+	item = kdbus_write_head(iter, type, size);
+	memcpy(item->data, data, size);
+	return item;
+}
+
+static size_t kdbus_meta_write(struct kdbus_meta_staging *staging, void *mem,
+			       size_t size)
+{
+	struct user_namespace *user_ns = staging->conn->cred->user_ns;
+	struct pid_namespace *pid_ns = ns_of_pid(staging->conn->pid);
+	struct kdbus_item *item = NULL, *items = mem;
+	u8 *end, *owned_names_end = NULL;
+
+	/* process metadata */
+
+	if (staging->mf && (staging->mask & KDBUS_ATTACH_CREDS)) {
+		const struct kdbus_meta_fake *mf = staging->mf;
+
+		item = kdbus_write_head(&items, KDBUS_ITEM_CREDS,
+					sizeof(struct kdbus_creds));
+		item->creds = (struct kdbus_creds){
+			.uid	= kdbus_from_kuid_keep(user_ns, mf->uid),
+			.euid	= kdbus_from_kuid_keep(user_ns, mf->euid),
+			.suid	= kdbus_from_kuid_keep(user_ns, mf->suid),
+			.fsuid	= kdbus_from_kuid_keep(user_ns, mf->fsuid),
+			.gid	= kdbus_from_kgid_keep(user_ns, mf->gid),
+			.egid	= kdbus_from_kgid_keep(user_ns, mf->egid),
+			.sgid	= kdbus_from_kgid_keep(user_ns, mf->sgid),
+			.fsgid	= kdbus_from_kgid_keep(user_ns, mf->fsgid),
+		};
+	} else if (staging->mp && (staging->mask & KDBUS_ATTACH_CREDS)) {
+		const struct cred *c = staging->mp->cred;
+
+		item = kdbus_write_head(&items, KDBUS_ITEM_CREDS,
+					sizeof(struct kdbus_creds));
+		item->creds = (struct kdbus_creds){
+			.uid	= kdbus_from_kuid_keep(user_ns, c->uid),
+			.euid	= kdbus_from_kuid_keep(user_ns, c->euid),
+			.suid	= kdbus_from_kuid_keep(user_ns, c->suid),
+			.fsuid	= kdbus_from_kuid_keep(user_ns, c->fsuid),
+			.gid	= kdbus_from_kgid_keep(user_ns, c->gid),
+			.egid	= kdbus_from_kgid_keep(user_ns, c->egid),
+			.sgid	= kdbus_from_kgid_keep(user_ns, c->sgid),
+			.fsgid	= kdbus_from_kgid_keep(user_ns, c->fsgid),
+		};
+	}
+
+	if (staging->mf && (staging->mask & KDBUS_ATTACH_PIDS)) {
+		item = kdbus_write_head(&items, KDBUS_ITEM_PIDS,
+					sizeof(struct kdbus_pids));
+		item->pids = (struct kdbus_pids){
+			.pid = pid_nr_ns(staging->mf->tgid, pid_ns),
+			.tid = pid_nr_ns(staging->mf->pid, pid_ns),
+			.ppid = pid_nr_ns(staging->mf->ppid, pid_ns),
+		};
+	} else if (staging->mp && (staging->mask & KDBUS_ATTACH_PIDS)) {
+		item = kdbus_write_head(&items, KDBUS_ITEM_PIDS,
+					sizeof(struct kdbus_pids));
+		item->pids = (struct kdbus_pids){
+			.pid = pid_nr_ns(staging->mp->tgid, pid_ns),
+			.tid = pid_nr_ns(staging->mp->pid, pid_ns),
+			.ppid = pid_nr_ns(staging->mp->ppid, pid_ns),
+		};
+	}
+
+	if (staging->mp && (staging->mask & KDBUS_ATTACH_AUXGROUPS)) {
+		const struct group_info *info = staging->mp->cred->group_info;
+		size_t i;
+
+		item = kdbus_write_head(&items, KDBUS_ITEM_AUXGROUPS,
+					info->ngroups * sizeof(u64));
+		for (i = 0; i < info->ngroups; ++i)
+			item->data64[i] = from_kgid_munged(user_ns,
+							   GROUP_AT(info, i));
+	}
+
+	if (staging->mp && (staging->mask & KDBUS_ATTACH_TID_COMM))
+		item = kdbus_write_full(&items, KDBUS_ITEM_TID_COMM,
+					strlen(staging->mp->tid_comm) + 1,
+					staging->mp->tid_comm);
+
+	if (staging->mp && (staging->mask & KDBUS_ATTACH_PID_COMM))
+		item = kdbus_write_full(&items, KDBUS_ITEM_PID_COMM,
+					strlen(staging->mp->pid_comm) + 1,
+					staging->mp->pid_comm);
+
+	if (staging->exe_path && (staging->mask & KDBUS_ATTACH_EXE))
+		item = kdbus_write_full(&items, KDBUS_ITEM_EXE,
+					strlen(staging->exe_path) + 1,
+					staging->exe_path);
+
+	if (staging->mp && (staging->mask & KDBUS_ATTACH_CMDLINE))
+		item = kdbus_write_full(&items, KDBUS_ITEM_CMDLINE,
+					strlen(staging->mp->cmdline) + 1,
+					staging->mp->cmdline);
+
+	if (staging->mp && (staging->mask & KDBUS_ATTACH_CGROUP))
+		item = kdbus_write_full(&items, KDBUS_ITEM_CGROUP,
+					strlen(staging->mp->cgroup) + 1,
+					staging->mp->cgroup);
+
+	if (staging->mp && (staging->mask & KDBUS_ATTACH_CAPS)) {
+		item = kdbus_write_head(&items, KDBUS_ITEM_CAPS,
+					sizeof(struct kdbus_meta_caps));
+		kdbus_meta_export_caps((void*)&item->caps, staging->mp,
+				       user_ns);
+	}
+
+	if (staging->mf && (staging->mask & KDBUS_ATTACH_SECLABEL))
+		item = kdbus_write_full(&items, KDBUS_ITEM_SECLABEL,
+					strlen(staging->mf->seclabel) + 1,
+					staging->mf->seclabel);
+	else if (staging->mp && (staging->mask & KDBUS_ATTACH_SECLABEL))
+		item = kdbus_write_full(&items, KDBUS_ITEM_SECLABEL,
+					strlen(staging->mp->seclabel) + 1,
+					staging->mp->seclabel);
+
+	if (staging->mp && (staging->mask & KDBUS_ATTACH_AUDIT)) {
+		item = kdbus_write_head(&items, KDBUS_ITEM_AUDIT,
+					sizeof(struct kdbus_audit));
+		item->audit = (struct kdbus_audit){
+			.loginuid = from_kuid(user_ns,
+					      staging->mp->audit_loginuid),
+			.sessionid = staging->mp->audit_sessionid,
+		};
+	}
+
+	/* connection metadata */
+
+	if (staging->mc && (staging->mask & KDBUS_ATTACH_NAMES)) {
+		memcpy(items, staging->mc->owned_names_items,
+		       KDBUS_ALIGN8(staging->mc->owned_names_size));
+		owned_names_end = (u8 *)items + staging->mc->owned_names_size;
+		items = (void *)KDBUS_ALIGN8((unsigned long)owned_names_end);
+	}
+
+	if (staging->mc && (staging->mask & KDBUS_ATTACH_CONN_DESCRIPTION))
+		item = kdbus_write_full(&items, KDBUS_ITEM_CONN_DESCRIPTION,
+				strlen(staging->mc->conn_description) + 1,
+				staging->mc->conn_description);
+
+	if (staging->mc && (staging->mask & KDBUS_ATTACH_TIMESTAMP))
+		item = kdbus_write_full(&items, KDBUS_ITEM_TIMESTAMP,
+					sizeof(staging->mc->ts),
+					&staging->mc->ts);
+
+	/*
+	 * Return real size (minus trailing padding). In case of 'owned_names'
+	 * we cannot deduce it from item->size, so treat it special.
+	 */
+
+	if (items == (void *)KDBUS_ALIGN8((unsigned long)owned_names_end))
+		end = owned_names_end;
+	else if (item)
+		end = (u8 *)item + item->size;
+	else
+		end = mem;
+
+	WARN_ON((u8 *)items - (u8 *)mem != size);
+	WARN_ON((void *)KDBUS_ALIGN8((unsigned long)end) != (void *)items);
+
+	return end - (u8 *)mem;
+}
+
+int kdbus_meta_emit(struct kdbus_meta_proc *mp,
+		    struct kdbus_meta_fake *mf,
+		    struct kdbus_meta_conn *mc,
+		    struct kdbus_conn *conn,
+		    u64 mask,
+		    struct kdbus_item **out_items,
+		    size_t *out_size)
+{
+	struct kdbus_meta_staging staging = {};
+	struct kdbus_item *items = NULL;
+	size_t size = 0;
+	int ret;
+
+	if (WARN_ON(mf && mp))
+		mp = NULL;
+
+	staging.mp = mp;
+	staging.mf = mf;
+	staging.mc = mc;
+	staging.conn = conn;
+
+	/* get mask of valid items */
+	if (mf)
+		staging.mask |= mf->valid;
+	if (mp) {
+		mutex_lock(&mp->lock);
+		staging.mask |= mp->valid;
+		mutex_unlock(&mp->lock);
+	}
+	if (mc) {
+		mutex_lock(&mc->lock);
+		staging.mask |= mc->valid;
+		mutex_unlock(&mc->lock);
+	}
+
+	staging.mask &= mask;
+
+	if (!staging.mask) { /* bail out if nothing to do */
+		ret = 0;
+		goto exit;
+	}
+
+	/* EXE is special as it needs a temporary page to assemble */
+	if (mp && (staging.mask & KDBUS_ATTACH_EXE)) {
+		struct path p;
+
+		/*
+		 * XXX: We need access to __d_path() so we can write the path
+		 * relative to conn->root_path. Once upstream, we need
+		 * EXPORT_SYMBOL(__d_path) or an equivalent of d_path() that
+		 * takes the root path directly. Until then, we drop this item
+		 * if the root-paths differ.
+		 */
+
+		get_fs_root(current->fs, &p);
+		if (path_equal(&p, &conn->root_path)) {
+			staging.exe = (void *)__get_free_page(GFP_TEMPORARY);
+			if (!staging.exe) {
+				path_put(&p);
+				ret = -ENOMEM;
+				goto exit;
+			}
+
+			staging.exe_path = d_path(&mp->exe_path, staging.exe,
+						  PAGE_SIZE);
+			if (IS_ERR(staging.exe_path)) {
+				path_put(&p);
+				ret = PTR_ERR(staging.exe_path);
+				goto exit;
+			}
+		}
+		path_put(&p);
+	}
+
+	size = kdbus_meta_measure(&staging);
+	if (!size) { /* bail out if nothing to do */
+		ret = 0;
+		goto exit;
+	}
+
+	items = kmalloc(size, GFP_KERNEL);
+	if (!items) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	size = kdbus_meta_write(&staging, items, size);
+	if (!size) {
+		kfree(items);
+		items = NULL;
+	}
+
+	ret = 0;
+
+exit:
+	if (staging.exe)
+		free_page((unsigned long)staging.exe);
+	if (ret >= 0) {
+		*out_items = items;
+		*out_size = size;
+	}
+	return ret;
+}
+
+enum {
+	KDBUS_META_PROC_NONE,
+	KDBUS_META_PROC_NORMAL,
+};
+
+/**
+ * kdbus_proc_permission() - check /proc permissions on target pid
+ * @pid_ns:		namespace we operate in
+ * @cred:		credentials of requestor
+ * @target:		target process
+ *
+ * This checks whether a process with credentials @cred can access information
+ * of @target in the namespace @pid_ns. This tries to follow /proc permissions,
+ * but is slightly more restrictive.
+ *
+ * Return: The /proc access level (KDBUS_META_PROC_*) is returned.
+ */
+static unsigned int kdbus_proc_permission(const struct pid_namespace *pid_ns,
+					  const struct cred *cred,
+					  struct pid *target)
+{
+	if (pid_ns->hide_pid < 1)
+		return KDBUS_META_PROC_NORMAL;
+
+	/* XXX: we need groups_search() exported for aux-groups */
+	if (gid_eq(cred->egid, pid_ns->pid_gid))
+		return KDBUS_META_PROC_NORMAL;
+
+	/*
+	 * XXX: If ptrace_may_access(PTRACE_MODE_READ) is granted, you can
+	 * overwrite hide_pid. However, ptrace_may_access() only supports
+	 * checking 'current', hence, we cannot use this here. But we
+	 * simply decide to not support this override, so no need to worry.
+	 */
+
+	return KDBUS_META_PROC_NONE;
+}
+
+/**
+ * kdbus_meta_proc_mask() - calculate which metadata would be visible to
+ *			    a connection via /proc
+ * @prv_pid:		pid of metadata provider
+ * @req_pid:		pid of metadata requestor
+ * @req_cred:		credentials of metadata reqeuestor
+ * @wanted:		metadata that is requested
+ *
+ * This checks which metadata items of @prv_pid can be read via /proc by the
+ * requestor @req_pid.
+ *
+ * Return: Set of metadata flags the requestor can see (limited by @wanted).
+ */
+static u64 kdbus_meta_proc_mask(struct pid *prv_pid,
+				struct pid *req_pid,
+				const struct cred *req_cred,
+				u64 wanted)
+{
+	struct pid_namespace *prv_ns, *req_ns;
+	unsigned int proc;
+
+	prv_ns = ns_of_pid(prv_pid);
+	req_ns = ns_of_pid(req_pid);
+
+	/*
+	 * If the sender is not visible in the receiver namespace, then the
+	 * receiver cannot access the sender via its own procfs. Hence, we do
+	 * not attach any additional metadata.
+	 */
+	if (!pid_nr_ns(prv_pid, req_ns))
+		return 0;
+
+	/*
+	 * If the pid-namespace of the receiver has hide_pid set, it cannot see
+	 * any process but its own. We shortcut this /proc permission check if
+	 * provider and requestor are the same. If not, we perform rather
+	 * expensive /proc permission checks.
+	 */
+	if (prv_pid == req_pid)
+		proc = KDBUS_META_PROC_NORMAL;
+	else
+		proc = kdbus_proc_permission(req_ns, req_cred, prv_pid);
+
+	/* you need /proc access to read standard process attributes */
+	if (proc < KDBUS_META_PROC_NORMAL)
+		wanted &= ~(KDBUS_ATTACH_TID_COMM |
+			    KDBUS_ATTACH_PID_COMM |
+			    KDBUS_ATTACH_SECLABEL |
+			    KDBUS_ATTACH_CMDLINE |
+			    KDBUS_ATTACH_CGROUP |
+			    KDBUS_ATTACH_AUDIT |
+			    KDBUS_ATTACH_CAPS |
+			    KDBUS_ATTACH_EXE);
+
+	/* clear all non-/proc flags */
+	return wanted & (KDBUS_ATTACH_TID_COMM |
+			 KDBUS_ATTACH_PID_COMM |
+			 KDBUS_ATTACH_SECLABEL |
+			 KDBUS_ATTACH_CMDLINE |
+			 KDBUS_ATTACH_CGROUP |
+			 KDBUS_ATTACH_AUDIT |
+			 KDBUS_ATTACH_CAPS |
+			 KDBUS_ATTACH_EXE);
+}
+
+/**
+ * kdbus_meta_get_mask() - calculate attach flags mask for metadata request
+ * @prv_pid:		pid of metadata provider
+ * @prv_mask:		mask of metadata the provide grants unchecked
+ * @req_pid:		pid of metadata requestor
+ * @req_cred:		credentials of metadata requestor
+ * @req_mask:		mask of metadata that is requested
+ *
+ * This calculates the metadata items that the requestor @req_pid can access
+ * from the metadata provider @prv_pid. This permission check consists of
+ * several different parts:
+ *  - Providers can grant metadata items unchecked. Regardless of their type,
+ *    they're always granted to the requestor. This mask is passed as @prv_mask.
+ *  - Basic items (credentials and connection metadata) are granted implicitly
+ *    to everyone. They're publicly available to any bus-user that can see the
+ *    provider.
+ *  - Process credentials that are not granted implicitly follow the same
+ *    permission checks as /proc. This means, we always assume a requestor
+ *    process has access to their *own* /proc mount, if they have access to
+ *    kdbusfs.
+ *
+ * Return: Mask of metadata that is granted.
+ */
+static u64 kdbus_meta_get_mask(struct pid *prv_pid, u64 prv_mask,
+			       struct pid *req_pid,
+			       const struct cred *req_cred, u64 req_mask)
+{
+	u64 missing, impl_mask, proc_mask = 0;
+
+	/*
+	 * Connection metadata and basic unix process credentials are
+	 * transmitted implicitly, and cannot be suppressed. Both are required
+	 * to perform user-space policies on the receiver-side. Furthermore,
+	 * connection metadata is public state, anyway, and unix credentials
+	 * are needed for UDS-compatibility. We extend them slightly by
+	 * auxiliary groups and additional uids/gids/pids.
+	 */
+	impl_mask = /* connection metadata */
+		    KDBUS_ATTACH_CONN_DESCRIPTION |
+		    KDBUS_ATTACH_TIMESTAMP |
+		    KDBUS_ATTACH_NAMES |
+		    /* credentials and pids */
+		    KDBUS_ATTACH_AUXGROUPS |
+		    KDBUS_ATTACH_CREDS |
+		    KDBUS_ATTACH_PIDS;
+
+	/*
+	 * Calculate the set of metadata that is not granted implicitly nor by
+	 * the sender, but still requested by the receiver. If any are left,
+	 * perform rather expensive /proc access checks for them.
+	 */
+	missing = req_mask & ~((prv_mask | impl_mask) & req_mask);
+	if (missing)
+		proc_mask = kdbus_meta_proc_mask(prv_pid, req_pid, req_cred,
+						 missing);
+
+	return (prv_mask | impl_mask | proc_mask) & req_mask;
+}
+
+/**
+ */
+u64 kdbus_meta_info_mask(const struct kdbus_conn *conn, u64 mask)
+{
+	return kdbus_meta_get_mask(conn->pid,
+				   atomic64_read(&conn->attach_flags_send),
+				   task_pid(current),
+				   current_cred(),
+				   mask);
+}
+
+/**
+ */
+u64 kdbus_meta_msg_mask(const struct kdbus_conn *snd,
+			const struct kdbus_conn *rcv)
+{
+	return kdbus_meta_get_mask(task_pid(current),
+				   atomic64_read(&snd->attach_flags_send),
+				   rcv->pid,
+				   rcv->cred,
+				   atomic64_read(&rcv->attach_flags_recv));
+}
diff -Nur linux-4.2/ipc/kdbus/metadata.h linux-4.2-pck/ipc/kdbus/metadata.h
--- linux-4.2/ipc/kdbus/metadata.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/metadata.h	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_METADATA_H
+#define __KDBUS_METADATA_H
+
+#include <linux/kernel.h>
+
+struct kdbus_conn;
+struct kdbus_pool_slice;
+
+struct kdbus_meta_proc;
+struct kdbus_meta_conn;
+
+/**
+ * struct kdbus_meta_fake - Fake metadata
+ * @valid:		Bitmask of collected and valid items
+ * @uid:		UID of process
+ * @euid:		EUID of process
+ * @suid:		SUID of process
+ * @fsuid:		FSUID of process
+ * @gid:		GID of process
+ * @egid:		EGID of process
+ * @sgid:		SGID of process
+ * @fsgid:		FSGID of process
+ * @pid:		PID of process
+ * @tgid:		TGID of process
+ * @ppid:		PPID of process
+ * @seclabel:		Seclabel
+ */
+struct kdbus_meta_fake {
+	u64 valid;
+
+	/* KDBUS_ITEM_CREDS */
+	kuid_t uid, euid, suid, fsuid;
+	kgid_t gid, egid, sgid, fsgid;
+
+	/* KDBUS_ITEM_PIDS */
+	struct pid *pid, *tgid, *ppid;
+
+	/* KDBUS_ITEM_SECLABEL */
+	char *seclabel;
+};
+
+struct kdbus_meta_proc *kdbus_meta_proc_new(void);
+struct kdbus_meta_proc *kdbus_meta_proc_ref(struct kdbus_meta_proc *mp);
+struct kdbus_meta_proc *kdbus_meta_proc_unref(struct kdbus_meta_proc *mp);
+int kdbus_meta_proc_collect(struct kdbus_meta_proc *mp, u64 what);
+
+struct kdbus_meta_fake *kdbus_meta_fake_new(void);
+struct kdbus_meta_fake *kdbus_meta_fake_free(struct kdbus_meta_fake *mf);
+int kdbus_meta_fake_collect(struct kdbus_meta_fake *mf,
+			    const struct kdbus_creds *creds,
+			    const struct kdbus_pids *pids,
+			    const char *seclabel);
+
+struct kdbus_meta_conn *kdbus_meta_conn_new(void);
+struct kdbus_meta_conn *kdbus_meta_conn_ref(struct kdbus_meta_conn *mc);
+struct kdbus_meta_conn *kdbus_meta_conn_unref(struct kdbus_meta_conn *mc);
+int kdbus_meta_conn_collect(struct kdbus_meta_conn *mc,
+			    struct kdbus_conn *conn,
+			    u64 msg_seqnum, u64 what);
+
+int kdbus_meta_emit(struct kdbus_meta_proc *mp,
+		    struct kdbus_meta_fake *mf,
+		    struct kdbus_meta_conn *mc,
+		    struct kdbus_conn *conn,
+		    u64 mask,
+		    struct kdbus_item **out_items,
+		    size_t *out_size);
+u64 kdbus_meta_info_mask(const struct kdbus_conn *conn, u64 mask);
+u64 kdbus_meta_msg_mask(const struct kdbus_conn *snd,
+			const struct kdbus_conn *rcv);
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/names.c linux-4.2-pck/ipc/kdbus/names.c
--- linux-4.2/ipc/kdbus/names.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/names.c	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,854 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/hash.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/uio.h>
+
+#include "bus.h"
+#include "connection.h"
+#include "endpoint.h"
+#include "handle.h"
+#include "item.h"
+#include "names.h"
+#include "notify.h"
+#include "policy.h"
+
+#define KDBUS_NAME_SAVED_MASK (KDBUS_NAME_ALLOW_REPLACEMENT |	\
+			       KDBUS_NAME_QUEUE)
+
+static bool kdbus_name_owner_is_used(struct kdbus_name_owner *owner)
+{
+	return !list_empty(&owner->name_entry) ||
+	       owner == owner->name->activator;
+}
+
+static struct kdbus_name_owner *
+kdbus_name_owner_new(struct kdbus_conn *conn, struct kdbus_name_entry *name,
+		     u64 flags)
+{
+	struct kdbus_name_owner *owner;
+
+	kdbus_conn_assert_active(conn);
+
+	if (conn->name_count >= KDBUS_CONN_MAX_NAMES)
+		return ERR_PTR(-E2BIG);
+
+	owner = kmalloc(sizeof(*owner), GFP_KERNEL);
+	if (!owner)
+		return ERR_PTR(-ENOMEM);
+
+	owner->flags = flags & KDBUS_NAME_SAVED_MASK;
+	owner->conn = conn;
+	owner->name = name;
+	list_add_tail(&owner->conn_entry, &conn->names_list);
+	INIT_LIST_HEAD(&owner->name_entry);
+
+	++conn->name_count;
+	return owner;
+}
+
+static void kdbus_name_owner_free(struct kdbus_name_owner *owner)
+{
+	if (!owner)
+		return;
+
+	WARN_ON(kdbus_name_owner_is_used(owner));
+	--owner->conn->name_count;
+	list_del(&owner->conn_entry);
+	kfree(owner);
+}
+
+static struct kdbus_name_owner *
+kdbus_name_owner_find(struct kdbus_name_entry *name, struct kdbus_conn *conn)
+{
+	struct kdbus_name_owner *owner;
+
+	/*
+	 * Use conn->names_list over name->queue to make sure boundaries of
+	 * this linear search are controlled by the connection itself.
+	 * Furthermore, this will find normal owners as well as activators
+	 * without any additional code.
+	 */
+	list_for_each_entry(owner, &conn->names_list, conn_entry)
+		if (owner->name == name)
+			return owner;
+
+	return NULL;
+}
+
+static bool kdbus_name_entry_is_used(struct kdbus_name_entry *name)
+{
+	return !list_empty(&name->queue) || name->activator;
+}
+
+static struct kdbus_name_owner *
+kdbus_name_entry_first(struct kdbus_name_entry *name)
+{
+	return list_first_entry_or_null(&name->queue, struct kdbus_name_owner,
+					name_entry);
+}
+
+static struct kdbus_name_entry *
+kdbus_name_entry_new(struct kdbus_name_registry *r, u32 hash,
+		     const char *name_str)
+{
+	struct kdbus_name_entry *name;
+	size_t namelen;
+
+	lockdep_assert_held(&r->rwlock);
+
+	namelen = strlen(name_str);
+
+	name = kmalloc(sizeof(*name) + namelen + 1, GFP_KERNEL);
+	if (!name)
+		return ERR_PTR(-ENOMEM);
+
+	name->name_id = ++r->name_seq_last;
+	name->activator = NULL;
+	INIT_LIST_HEAD(&name->queue);
+	hash_add(r->entries_hash, &name->hentry, hash);
+	memcpy(name->name, name_str, namelen + 1);
+
+	return name;
+}
+
+static void kdbus_name_entry_free(struct kdbus_name_entry *name)
+{
+	if (!name)
+		return;
+
+	WARN_ON(kdbus_name_entry_is_used(name));
+	hash_del(&name->hentry);
+	kfree(name);
+}
+
+static struct kdbus_name_entry *
+kdbus_name_entry_find(struct kdbus_name_registry *r, u32 hash,
+		      const char *name_str)
+{
+	struct kdbus_name_entry *name;
+
+	lockdep_assert_held(&r->rwlock);
+
+	hash_for_each_possible(r->entries_hash, name, hentry, hash)
+		if (!strcmp(name->name, name_str))
+			return name;
+
+	return NULL;
+}
+
+/**
+ * kdbus_name_registry_new() - create a new name registry
+ *
+ * Return: a new kdbus_name_registry on success, ERR_PTR on failure.
+ */
+struct kdbus_name_registry *kdbus_name_registry_new(void)
+{
+	struct kdbus_name_registry *r;
+
+	r = kmalloc(sizeof(*r), GFP_KERNEL);
+	if (!r)
+		return ERR_PTR(-ENOMEM);
+
+	hash_init(r->entries_hash);
+	init_rwsem(&r->rwlock);
+	r->name_seq_last = 0;
+
+	return r;
+}
+
+/**
+ * kdbus_name_registry_free() - free name registry
+ * @r:		name registry to free, or NULL
+ *
+ * Free a name registry and cleanup all internal objects. This is a no-op if
+ * you pass NULL as registry.
+ */
+void kdbus_name_registry_free(struct kdbus_name_registry *r)
+{
+	if (!r)
+		return;
+
+	WARN_ON(!hash_empty(r->entries_hash));
+	kfree(r);
+}
+
+/**
+ * kdbus_name_lookup_unlocked() - lookup name in registry
+ * @reg:		name registry
+ * @name:		name to lookup
+ *
+ * This looks up @name in the given name-registry and returns the
+ * kdbus_name_entry object. The caller must hold the registry-lock and must not
+ * access the returned object after releasing the lock.
+ *
+ * Return: Pointer to name-entry, or NULL if not found.
+ */
+struct kdbus_name_entry *
+kdbus_name_lookup_unlocked(struct kdbus_name_registry *reg, const char *name)
+{
+	return kdbus_name_entry_find(reg, kdbus_strhash(name), name);
+}
+
+static int kdbus_name_become_activator(struct kdbus_name_owner *owner,
+				       u64 *return_flags)
+{
+	if (kdbus_name_owner_is_used(owner))
+		return -EALREADY;
+	if (owner->name->activator)
+		return -EEXIST;
+
+	owner->name->activator = owner;
+	owner->flags |= KDBUS_NAME_ACTIVATOR;
+
+	if (kdbus_name_entry_first(owner->name)) {
+		owner->flags |= KDBUS_NAME_IN_QUEUE;
+	} else {
+		owner->flags |= KDBUS_NAME_PRIMARY;
+		kdbus_notify_name_change(owner->conn->ep->bus,
+					 KDBUS_ITEM_NAME_ADD,
+					 0, owner->conn->id,
+					 0, owner->flags,
+					 owner->name->name);
+	}
+
+	if (return_flags)
+		*return_flags = owner->flags | KDBUS_NAME_ACQUIRED;
+
+	return 0;
+}
+
+static int kdbus_name_update(struct kdbus_name_owner *owner, u64 flags,
+			     u64 *return_flags)
+{
+	struct kdbus_name_owner *primary, *activator;
+	struct kdbus_name_entry *name;
+	struct kdbus_bus *bus;
+	u64 nflags = 0;
+	int ret = 0;
+
+	name = owner->name;
+	bus = owner->conn->ep->bus;
+	primary = kdbus_name_entry_first(name);
+	activator = name->activator;
+
+	/* cannot be activator and acquire a name */
+	if (owner == activator)
+		return -EUCLEAN;
+
+	/* update saved flags */
+	owner->flags = flags & KDBUS_NAME_SAVED_MASK;
+
+	if (!primary) {
+		/*
+		 * No primary owner (but maybe an activator). Take over the
+		 * name.
+		 */
+
+		list_add(&owner->name_entry, &name->queue);
+		owner->flags |= KDBUS_NAME_PRIMARY;
+		nflags |= KDBUS_NAME_ACQUIRED;
+
+		/* move messages to new owner on activation */
+		if (activator) {
+			kdbus_conn_move_messages(owner->conn, activator->conn,
+						 name->name_id);
+			kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_CHANGE,
+					activator->conn->id, owner->conn->id,
+					activator->flags, owner->flags,
+					name->name);
+			activator->flags &= ~KDBUS_NAME_PRIMARY;
+			activator->flags |= KDBUS_NAME_IN_QUEUE;
+		} else {
+			kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_ADD,
+						 0, owner->conn->id,
+						 0, owner->flags,
+						 name->name);
+		}
+
+	} else if (owner == primary) {
+		/*
+		 * Already the primary owner of the name, flags were already
+		 * updated. Nothing to do.
+		 */
+
+		owner->flags |= KDBUS_NAME_PRIMARY;
+
+	} else if ((primary->flags & KDBUS_NAME_ALLOW_REPLACEMENT) &&
+		   (flags & KDBUS_NAME_REPLACE_EXISTING)) {
+		/*
+		 * We're not the primary owner but can replace it. Move us
+		 * ahead of the primary owner and acquire the name (possibly
+		 * skipping queued owners ahead of us).
+		 */
+
+		list_del_init(&owner->name_entry);
+		list_add(&owner->name_entry, &name->queue);
+		owner->flags |= KDBUS_NAME_PRIMARY;
+		nflags |= KDBUS_NAME_ACQUIRED;
+
+		kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_CHANGE,
+					 primary->conn->id, owner->conn->id,
+					 primary->flags, owner->flags,
+					 name->name);
+
+		/* requeue old primary, or drop if queueing not wanted */
+		if (primary->flags & KDBUS_NAME_QUEUE) {
+			primary->flags &= ~KDBUS_NAME_PRIMARY;
+			primary->flags |= KDBUS_NAME_IN_QUEUE;
+		} else {
+			list_del_init(&primary->name_entry);
+			kdbus_name_owner_free(primary);
+		}
+
+	} else if (flags & KDBUS_NAME_QUEUE) {
+		/*
+		 * Name is already occupied and we cannot take it over, but
+		 * queuing is allowed. Put us silently on the queue, if not
+		 * already there.
+		 */
+
+		owner->flags |= KDBUS_NAME_IN_QUEUE;
+		if (!kdbus_name_owner_is_used(owner)) {
+			list_add_tail(&owner->name_entry, &name->queue);
+			nflags |= KDBUS_NAME_ACQUIRED;
+		}
+	} else if (kdbus_name_owner_is_used(owner)) {
+		/*
+		 * Already queued on name, but re-queueing was not requested.
+		 * Make sure to unlink it from the name, the caller is
+		 * responsible for releasing it.
+		 */
+
+		list_del_init(&owner->name_entry);
+	} else {
+		/*
+		 * Name is already claimed and queueing is not requested.
+		 * Return error to the caller.
+		 */
+
+		ret = -EEXIST;
+	}
+
+	if (return_flags)
+		*return_flags = owner->flags | nflags;
+
+	return ret;
+}
+
+int kdbus_name_acquire(struct kdbus_name_registry *reg,
+		       struct kdbus_conn *conn, const char *name_str,
+		       u64 flags, u64 *return_flags)
+{
+	struct kdbus_name_entry *name = NULL;
+	struct kdbus_name_owner *owner = NULL;
+	u32 hash;
+	int ret;
+
+	kdbus_conn_assert_active(conn);
+
+	down_write(&reg->rwlock);
+
+	/*
+	 * Verify the connection has access to the name. Do this before testing
+	 * for double-acquisitions and other errors to make sure we do not leak
+	 * information about this name through possible custom endpoints.
+	 */
+	if (!kdbus_conn_policy_own_name(conn, current_cred(), name_str)) {
+		ret = -EPERM;
+		goto exit;
+	}
+
+	/*
+	 * Lookup the name entry. If it already exists, search for an owner
+	 * entry as we might already own that name. If either does not exist,
+	 * we will allocate a fresh one.
+	 */
+	hash = kdbus_strhash(name_str);
+	name = kdbus_name_entry_find(reg, hash, name_str);
+	if (name) {
+		owner = kdbus_name_owner_find(name, conn);
+	} else {
+		name = kdbus_name_entry_new(reg, hash, name_str);
+		if (IS_ERR(name)) {
+			ret = PTR_ERR(name);
+			name = NULL;
+			goto exit;
+		}
+	}
+
+	/* create name owner object if not already queued */
+	if (!owner) {
+		owner = kdbus_name_owner_new(conn, name, flags);
+		if (IS_ERR(owner)) {
+			ret = PTR_ERR(owner);
+			owner = NULL;
+			goto exit;
+		}
+	}
+
+	if (flags & KDBUS_NAME_ACTIVATOR)
+		ret = kdbus_name_become_activator(owner, return_flags);
+	else
+		ret = kdbus_name_update(owner, flags, return_flags);
+	if (ret < 0)
+		goto exit;
+
+exit:
+	if (owner && !kdbus_name_owner_is_used(owner))
+		kdbus_name_owner_free(owner);
+	if (name && !kdbus_name_entry_is_used(name))
+		kdbus_name_entry_free(name);
+	up_write(&reg->rwlock);
+	kdbus_notify_flush(conn->ep->bus);
+	return ret;
+}
+
+static void kdbus_name_release_unlocked(struct kdbus_name_owner *owner)
+{
+	struct kdbus_name_owner *primary, *next;
+	struct kdbus_name_entry *name;
+
+	name = owner->name;
+	primary = kdbus_name_entry_first(name);
+
+	list_del_init(&owner->name_entry);
+	if (owner == name->activator)
+		name->activator = NULL;
+
+	if (!primary || owner == primary) {
+		next = kdbus_name_entry_first(name);
+		if (!next)
+			next = name->activator;
+
+		if (next) {
+			/* hand to next in queue */
+			next->flags &= ~KDBUS_NAME_IN_QUEUE;
+			next->flags |= KDBUS_NAME_PRIMARY;
+			if (next == name->activator)
+				kdbus_conn_move_messages(next->conn,
+							 owner->conn,
+							 name->name_id);
+
+			kdbus_notify_name_change(owner->conn->ep->bus,
+					KDBUS_ITEM_NAME_CHANGE,
+					owner->conn->id, next->conn->id,
+					owner->flags, next->flags,
+					name->name);
+		} else {
+			kdbus_notify_name_change(owner->conn->ep->bus,
+						 KDBUS_ITEM_NAME_REMOVE,
+						 owner->conn->id, 0,
+						 owner->flags, 0,
+						 name->name);
+		}
+	}
+
+	kdbus_name_owner_free(owner);
+	if (!kdbus_name_entry_is_used(name))
+		kdbus_name_entry_free(name);
+}
+
+static int kdbus_name_release(struct kdbus_name_registry *reg,
+			      struct kdbus_conn *conn,
+			      const char *name_str)
+{
+	struct kdbus_name_owner *owner;
+	struct kdbus_name_entry *name;
+	int ret = 0;
+
+	down_write(&reg->rwlock);
+	name = kdbus_name_entry_find(reg, kdbus_strhash(name_str), name_str);
+	if (name) {
+		owner = kdbus_name_owner_find(name, conn);
+		if (owner)
+			kdbus_name_release_unlocked(owner);
+		else
+			ret = -EADDRINUSE;
+	} else {
+		ret = -ESRCH;
+	}
+	up_write(&reg->rwlock);
+
+	kdbus_notify_flush(conn->ep->bus);
+	return ret;
+}
+
+/**
+ * kdbus_name_release_all() - remove all name entries of a given connection
+ * @reg:		name registry
+ * @conn:		connection
+ */
+void kdbus_name_release_all(struct kdbus_name_registry *reg,
+			    struct kdbus_conn *conn)
+{
+	struct kdbus_name_owner *owner;
+
+	down_write(&reg->rwlock);
+
+	while ((owner = list_first_entry_or_null(&conn->names_list,
+						 struct kdbus_name_owner,
+						 conn_entry)))
+		kdbus_name_release_unlocked(owner);
+
+	up_write(&reg->rwlock);
+
+	kdbus_notify_flush(conn->ep->bus);
+}
+
+/**
+ * kdbus_name_is_valid() - check if a name is valid
+ * @p:			The name to check
+ * @allow_wildcard:	Whether or not to allow a wildcard name
+ *
+ * A name is valid if all of the following criterias are met:
+ *
+ *  - The name has two or more elements separated by a period ('.') character.
+ *  - All elements must contain at least one character.
+ *  - Each element must only contain the ASCII characters "[A-Z][a-z][0-9]_-"
+ *    and must not begin with a digit.
+ *  - The name must not exceed KDBUS_NAME_MAX_LEN.
+ *  - If @allow_wildcard is true, the name may end on '.*'
+ */
+bool kdbus_name_is_valid(const char *p, bool allow_wildcard)
+{
+	bool dot, found_dot = false;
+	const char *q;
+
+	for (dot = true, q = p; *q; q++) {
+		if (*q == '.') {
+			if (dot)
+				return false;
+
+			found_dot = true;
+			dot = true;
+		} else {
+			bool good;
+
+			good = isalpha(*q) || (!dot && isdigit(*q)) ||
+				*q == '_' || *q == '-' ||
+				(allow_wildcard && dot &&
+					*q == '*' && *(q + 1) == '\0');
+
+			if (!good)
+				return false;
+
+			dot = false;
+		}
+	}
+
+	if (q - p > KDBUS_NAME_MAX_LEN)
+		return false;
+
+	if (dot)
+		return false;
+
+	if (!found_dot)
+		return false;
+
+	return true;
+}
+
+/**
+ * kdbus_cmd_name_acquire() - handle KDBUS_CMD_NAME_ACQUIRE
+ * @conn:		connection to operate on
+ * @argp:		command payload
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_name_acquire(struct kdbus_conn *conn, void __user *argp)
+{
+	const char *item_name;
+	struct kdbus_cmd *cmd;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+		{ .type = KDBUS_ITEM_NAME, .mandatory = true },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE |
+				 KDBUS_NAME_REPLACE_EXISTING |
+				 KDBUS_NAME_ALLOW_REPLACEMENT |
+				 KDBUS_NAME_QUEUE,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	if (!kdbus_conn_is_ordinary(conn))
+		return -EOPNOTSUPP;
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	item_name = argv[1].item->str;
+	if (!kdbus_name_is_valid(item_name, false)) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = kdbus_name_acquire(conn->ep->bus->name_registry, conn, item_name,
+				 cmd->flags, &cmd->return_flags);
+
+exit:
+	return kdbus_args_clear(&args, ret);
+}
+
+/**
+ * kdbus_cmd_name_release() - handle KDBUS_CMD_NAME_RELEASE
+ * @conn:		connection to operate on
+ * @argp:		command payload
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_name_release(struct kdbus_conn *conn, void __user *argp)
+{
+	struct kdbus_cmd *cmd;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+		{ .type = KDBUS_ITEM_NAME, .mandatory = true },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	if (!kdbus_conn_is_ordinary(conn))
+		return -EOPNOTSUPP;
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	ret = kdbus_name_release(conn->ep->bus->name_registry, conn,
+				 argv[1].item->str);
+	return kdbus_args_clear(&args, ret);
+}
+
+static int kdbus_list_write(struct kdbus_conn *conn,
+			    struct kdbus_conn *c,
+			    struct kdbus_pool_slice *slice,
+			    size_t *pos,
+			    struct kdbus_name_owner *o,
+			    bool write)
+{
+	struct kvec kvec[4];
+	size_t cnt = 0;
+	int ret;
+
+	/* info header */
+	struct kdbus_info info = {
+		.size = 0,
+		.id = c->id,
+		.flags = c->flags,
+	};
+
+	/* fake the header of a kdbus_name item */
+	struct {
+		u64 size;
+		u64 type;
+		u64 flags;
+	} h = {};
+
+	if (o && !kdbus_conn_policy_see_name_unlocked(conn, current_cred(),
+						      o->name->name))
+		return 0;
+
+	kdbus_kvec_set(&kvec[cnt++], &info, sizeof(info), &info.size);
+
+	/* append name */
+	if (o) {
+		size_t slen = strlen(o->name->name) + 1;
+
+		h.size = offsetof(struct kdbus_item, name.name) + slen;
+		h.type = KDBUS_ITEM_OWNED_NAME;
+		h.flags = o->flags;
+
+		kdbus_kvec_set(&kvec[cnt++], &h, sizeof(h), &info.size);
+		kdbus_kvec_set(&kvec[cnt++], o->name->name, slen, &info.size);
+		cnt += !!kdbus_kvec_pad(&kvec[cnt], &info.size);
+	}
+
+	if (write) {
+		ret = kdbus_pool_slice_copy_kvec(slice, *pos, kvec,
+						 cnt, info.size);
+		if (ret < 0)
+			return ret;
+	}
+
+	*pos += info.size;
+	return 0;
+}
+
+static int kdbus_list_all(struct kdbus_conn *conn, u64 flags,
+			  struct kdbus_pool_slice *slice,
+			  size_t *pos, bool write)
+{
+	struct kdbus_conn *c;
+	size_t p = *pos;
+	int ret, i;
+
+	hash_for_each(conn->ep->bus->conn_hash, i, c, hentry) {
+		bool added = false;
+
+		/* skip monitors */
+		if (kdbus_conn_is_monitor(c))
+			continue;
+
+		/* all names the connection owns */
+		if (flags & (KDBUS_LIST_NAMES |
+			     KDBUS_LIST_ACTIVATORS |
+			     KDBUS_LIST_QUEUED)) {
+			struct kdbus_name_owner *o;
+
+			list_for_each_entry(o, &c->names_list, conn_entry) {
+				if (o->flags & KDBUS_NAME_ACTIVATOR) {
+					if (!(flags & KDBUS_LIST_ACTIVATORS))
+						continue;
+
+					ret = kdbus_list_write(conn, c, slice,
+							       &p, o, write);
+					if (ret < 0) {
+						mutex_unlock(&c->lock);
+						return ret;
+					}
+
+					added = true;
+				} else if (o->flags & KDBUS_NAME_IN_QUEUE) {
+					if (!(flags & KDBUS_LIST_QUEUED))
+						continue;
+
+					ret = kdbus_list_write(conn, c, slice,
+							       &p, o, write);
+					if (ret < 0) {
+						mutex_unlock(&c->lock);
+						return ret;
+					}
+
+					added = true;
+				} else if (flags & KDBUS_LIST_NAMES) {
+					ret = kdbus_list_write(conn, c, slice,
+							       &p, o, write);
+					if (ret < 0) {
+						mutex_unlock(&c->lock);
+						return ret;
+					}
+
+					added = true;
+				}
+			}
+		}
+
+		/* nothing added so far, just add the unique ID */
+		if (!added && (flags & KDBUS_LIST_UNIQUE)) {
+			ret = kdbus_list_write(conn, c, slice, &p, NULL, write);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	*pos = p;
+	return 0;
+}
+
+/**
+ * kdbus_cmd_list() - handle KDBUS_CMD_LIST
+ * @conn:		connection to operate on
+ * @argp:		command payload
+ *
+ * Return: >=0 on success, negative error code on failure.
+ */
+int kdbus_cmd_list(struct kdbus_conn *conn, void __user *argp)
+{
+	struct kdbus_name_registry *reg = conn->ep->bus->name_registry;
+	struct kdbus_pool_slice *slice = NULL;
+	struct kdbus_cmd_list *cmd;
+	size_t pos, size;
+	int ret;
+
+	struct kdbus_arg argv[] = {
+		{ .type = KDBUS_ITEM_NEGOTIATE },
+	};
+	struct kdbus_args args = {
+		.allowed_flags = KDBUS_FLAG_NEGOTIATE |
+				 KDBUS_LIST_UNIQUE |
+				 KDBUS_LIST_NAMES |
+				 KDBUS_LIST_ACTIVATORS |
+				 KDBUS_LIST_QUEUED,
+		.argv = argv,
+		.argc = ARRAY_SIZE(argv),
+	};
+
+	ret = kdbus_args_parse(&args, argp, &cmd);
+	if (ret != 0)
+		return ret;
+
+	/* lock order: domain -> bus -> ep -> names -> conn */
+	down_read(&reg->rwlock);
+	down_read(&conn->ep->bus->conn_rwlock);
+	down_read(&conn->ep->policy_db.entries_rwlock);
+
+	/* size of records */
+	size = 0;
+	ret = kdbus_list_all(conn, cmd->flags, NULL, &size, false);
+	if (ret < 0)
+		goto exit_unlock;
+
+	if (size == 0) {
+		kdbus_pool_publish_empty(conn->pool, &cmd->offset,
+					 &cmd->list_size);
+	} else {
+		slice = kdbus_pool_slice_alloc(conn->pool, size, false);
+		if (IS_ERR(slice)) {
+			ret = PTR_ERR(slice);
+			slice = NULL;
+			goto exit_unlock;
+		}
+
+		/* copy the records */
+		pos = 0;
+		ret = kdbus_list_all(conn, cmd->flags, slice, &pos, true);
+		if (ret < 0)
+			goto exit_unlock;
+
+		WARN_ON(pos != size);
+		kdbus_pool_slice_publish(slice, &cmd->offset, &cmd->list_size);
+	}
+
+	if (kdbus_member_set_user(&cmd->offset, argp, typeof(*cmd), offset) ||
+	    kdbus_member_set_user(&cmd->list_size, argp,
+				  typeof(*cmd), list_size))
+		ret = -EFAULT;
+
+exit_unlock:
+	up_read(&conn->ep->policy_db.entries_rwlock);
+	up_read(&conn->ep->bus->conn_rwlock);
+	up_read(&reg->rwlock);
+	kdbus_pool_slice_release(slice);
+	return kdbus_args_clear(&args, ret);
+}
diff -Nur linux-4.2/ipc/kdbus/names.h linux-4.2-pck/ipc/kdbus/names.h
--- linux-4.2/ipc/kdbus/names.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/names.h	2015-09-08 00:48:22.235030231 -0300
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_NAMES_H
+#define __KDBUS_NAMES_H
+
+#include <linux/hashtable.h>
+#include <linux/rwsem.h>
+
+struct kdbus_name_entry;
+struct kdbus_name_owner;
+struct kdbus_name_registry;
+
+/**
+ * struct kdbus_name_registry - names registered for a bus
+ * @entries_hash:	Map of entries
+ * @lock:		Registry data lock
+ * @name_seq_last:	Last used sequence number to assign to a name entry
+ */
+struct kdbus_name_registry {
+	DECLARE_HASHTABLE(entries_hash, 8);
+	struct rw_semaphore rwlock;
+	u64 name_seq_last;
+};
+
+/**
+ * struct kdbus_name_entry - well-know name entry
+ * @name_id:		sequence number of name entry to be able to uniquely
+ *			identify a name over its registration lifetime
+ * @activator:		activator of this name, or NULL
+ * @queue:		list of queued owners
+ * @hentry:		entry in registry map
+ * @name:		well-known name
+ */
+struct kdbus_name_entry {
+	u64 name_id;
+	struct kdbus_name_owner *activator;
+	struct list_head queue;
+	struct hlist_node hentry;
+	char name[];
+};
+
+/**
+ * struct kdbus_name_owner - owner of a well-known name
+ * @flags:		KDBUS_NAME_* flags of this owner
+ * @conn:		connection owning the name
+ * @name:		name that is owned
+ * @conn_entry:		link into @conn
+ * @name_entry:		link into @name
+ */
+struct kdbus_name_owner {
+	u64 flags;
+	struct kdbus_conn *conn;
+	struct kdbus_name_entry *name;
+	struct list_head conn_entry;
+	struct list_head name_entry;
+};
+
+bool kdbus_name_is_valid(const char *p, bool allow_wildcard);
+
+struct kdbus_name_registry *kdbus_name_registry_new(void);
+void kdbus_name_registry_free(struct kdbus_name_registry *reg);
+
+struct kdbus_name_entry *
+kdbus_name_lookup_unlocked(struct kdbus_name_registry *reg, const char *name);
+
+int kdbus_name_acquire(struct kdbus_name_registry *reg,
+		       struct kdbus_conn *conn, const char *name,
+		       u64 flags, u64 *return_flags);
+void kdbus_name_release_all(struct kdbus_name_registry *reg,
+			    struct kdbus_conn *conn);
+
+int kdbus_cmd_name_acquire(struct kdbus_conn *conn, void __user *argp);
+int kdbus_cmd_name_release(struct kdbus_conn *conn, void __user *argp);
+int kdbus_cmd_list(struct kdbus_conn *conn, void __user *argp);
+
+/**
+ * kdbus_name_get_owner() - get current owner of a name
+ * @name:	name to get current owner of
+ *
+ * This returns a pointer to the current owner of a name (or its activator if
+ * there is no owner). The caller must make sure @name is valid and does not
+ * vanish.
+ *
+ * Return: Pointer to current owner or NULL if there is none.
+ */
+static inline struct kdbus_name_owner *
+kdbus_name_get_owner(struct kdbus_name_entry *name)
+{
+	return list_first_entry_or_null(&name->queue, struct kdbus_name_owner,
+					name_entry) ? : name->activator;
+}
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/node.c linux-4.2-pck/ipc/kdbus/node.c
--- linux-4.2/ipc/kdbus/node.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/node.c	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,948 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/atomic.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/kdev_t.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include "bus.h"
+#include "domain.h"
+#include "endpoint.h"
+#include "fs.h"
+#include "handle.h"
+#include "node.h"
+#include "util.h"
+
+/**
+ * DOC: kdbus nodes
+ *
+ * Nodes unify lifetime management across exposed kdbus objects and provide a
+ * hierarchy. Each kdbus object, that might be exposed to user-space, has a
+ * kdbus_node object embedded and is linked into the hierarchy. Each node can
+ * have any number (0-n) of child nodes linked. Each child retains a reference
+ * to its parent node. For root-nodes, the parent is NULL.
+ *
+ * Each node object goes through a bunch of states during it's lifetime:
+ *     * NEW
+ *       * LINKED    (can be skipped by NEW->FREED transition)
+ *         * ACTIVE  (can be skipped by LINKED->INACTIVE transition)
+ *       * INACTIVE
+ *       * DRAINED
+ *     * FREED
+ *
+ * Each node is allocated by the caller and initialized via kdbus_node_init().
+ * This never fails and sets the object into state NEW. From now on, ref-counts
+ * on the node manage its lifetime. During init, the ref-count is set to 1. Once
+ * it drops to 0, the node goes to state FREED and the node->free_cb() callback
+ * is called to deallocate any memory.
+ *
+ * After initializing a node, you usually link it into the hierarchy. You need
+ * to provide a parent node and a name. The node will be linked as child to the
+ * parent and a globally unique ID is assigned to the child. The name of the
+ * child must be unique for all children of this parent. Otherwise, linking the
+ * child will fail with -EEXIST.
+ * Note that the child is not marked active, yet. Admittedly, it prevents any
+ * other node from being linked with the same name (thus, it reserves that
+ * name), but any child-lookup (via name or unique ID) will never return this
+ * child unless it has been marked active.
+ *
+ * Once successfully linked, you can use kdbus_node_activate() to activate a
+ * child. This will mark the child active. This state can be skipped by directly
+ * deactivating the child via kdbus_node_deactivate() (see below).
+ * By activating a child, you enable any lookups on this child to succeed from
+ * now on. Furthermore, any code that got its hands on a reference to the node,
+ * can from now on "acquire" the node.
+ *
+ *     Active References (or: 'acquiring' and 'releasing' a node)
+ *     Additionally to normal object references, nodes support something we call
+ *     "active references". An active reference can be acquired via
+ *     kdbus_node_acquire() and released via kdbus_node_release(). A caller
+ *     _must_ own a normal object reference whenever calling those functions.
+ *     Unlike object references, acquiring an active reference can fail (by
+ *     returning 'false' from kdbus_node_acquire()). An active reference can
+ *     only be acquired if the node is marked active. If it is not marked
+ *     active, yet, or if it was already deactivated, no more active references
+ *     can be acquired, ever!
+ *     Active references are used to track tasks working on a node. Whenever a
+ *     task enters kernel-space to perform an action on a node, it acquires an
+ *     active reference, performs the action and releases the reference again.
+ *     While holding an active reference, the node is guaranteed to stay active.
+ *     If the node is deactivated in parallel, the node is marked as
+ *     deactivated, then we wait for all active references to be dropped, before
+ *     we finally proceed with any cleanups. That is, if you hold an active
+ *     reference to a node, any resources that are bound to the "active" state
+ *     are guaranteed to stay accessible until you release your reference.
+ *
+ *     Active-references are very similar to rw-locks, where acquiring a node is
+ *     equal to try-read-lock and releasing to read-unlock. Deactivating a node
+ *     means write-lock and never releasing it again.
+ *     Unlike rw-locks, the 'active reference' concept is more versatile and
+ *     avoids unusual rw-lock usage (never releasing a write-lock..).
+ *
+ *     It is safe to acquire multiple active-references recursively. But you
+ *     need to check the return value of kdbus_node_acquire() on _each_ call. It
+ *     may stop granting references at _any_ time.
+ *
+ *     You're free to perform any operations you want while holding an active
+ *     reference, except sleeping for an indefinite period. Sleeping for a fixed
+ *     amount of time is fine, but you usually should not wait on wait-queues
+ *     without a timeout.
+ *     For example, if you wait for I/O to happen, you should gather all data
+ *     and schedule the I/O operation, then release your active reference and
+ *     wait for it to complete. Then try to acquire a new reference. If it
+ *     fails, perform any cleanup (the node is now dead). Otherwise, you can
+ *     finish your operation.
+ *
+ * All nodes can be deactivated via kdbus_node_deactivate() at any time. You can
+ * call this multiple times, even in parallel or on nodes that were never
+ * linked, and it will just work. Furthermore, all children will be deactivated
+ * recursively as well. If a node is deactivated, there might still be active
+ * references that were acquired before calling kdbus_node_deactivate(). The
+ * owner of an object must call kdbus_node_drain() (which is a superset of
+ * kdbus_node_deactivate()) before dropping their reference. This will
+ * deactivate the node and also synchronously wait for all active references to
+ * be dropped. Hence, once kdbus_node_drain() returns, the node is fully
+ * released and no active references exist, anymore.
+ * kdbus_node_drain() can be called at any times, multiple times, and in
+ * parallel on multiple threads. All calls are synchronized internally and will
+ * return only once the node is fully drained. The only restriction is, you
+ * must not hold an active reference when calling kdbus_node_drain() (unlike
+ * deactivation, which allows the caller to hold an active reference).
+ *
+ * When a node is activated, we acquire a normal object reference to the node.
+ * This reference is dropped after deactivation is fully done (and only if the
+ * node really was activated). This allows callers to link+activate a child node
+ * and then drop all refs. This has the effect that nobody owns a reference to
+ * the node, except for the parent node. Hence, if the parent is deactivated
+ * (and thus all children are deactivated, too), this will automatically
+ * release the child node.
+ *
+ * Currently, nodes provide a bunch of resources that external code can use
+ * directly. This includes:
+ *
+ *     * node->waitq: Each node has its own wait-queue that is used to manage
+ *                    the 'active' state. When a node is deactivated, we wait on
+ *                    this queue until all active refs are dropped. Analogously,
+ *                    when you release an active reference on a deactivated
+ *                    node, and the active ref-count drops to 0, we wake up a
+ *                    single thread on this queue. Furthermore, once the
+ *                    ->release_cb() callback finished, we wake up all waiters.
+ *                    The node-owner is free to re-use this wait-queue for other
+ *                    purposes. As node-management uses this queue only during
+ *                    deactivation, it is usually totally fine to re-use the
+ *                    queue for other, preferably low-overhead, use-cases.
+ *
+ *     * node->type: This field defines the type of the owner of this node. It
+ *                   must be set during node initialization and must remain
+ *                   constant. The node management never looks at this value,
+ *                   but external users might use to gain access to the owner
+ *                   object of a node.
+ *                   It is totally up to the owner of the node to define what
+ *                   their type means. Usually it means you can access the
+ *                   parent structure via container_of(), as long as you hold an
+ *                   active reference to the node.
+ *
+ *     * node->free_cb:    callback after all references are dropped
+ *       node->release_cb: callback during node deactivation
+ *                         These fields must be set by the node owner during
+ *                         node initialization. They must remain constant. If
+ *                         NULL, they're skipped.
+ *
+ *     * node->mode: filesystem access modes
+ *       node->uid:  filesystem owner uid
+ *       node->gid:  filesystem owner gid
+ *                   These fields must be set by the node owner during node
+ *                   initialization. They must remain constant and may be
+ *                   accessed by other callers to properly initialize
+ *                   filesystem nodes.
+ *
+ *     * node->id: This is an unsigned 32bit integer allocated by an IDA. It is
+ *                 always kept as small as possible during allocation and is
+ *                 globally unique across all nodes allocated by this module. 0
+ *                 is reserved as "not assigned" and is the default.
+ *                 The ID is assigned during kdbus_node_link() and is kept until
+ *                 the object is freed. Thus, the ID surpasses the active
+ *                 lifetime of a node. As long as you hold an object reference
+ *                 to a node (and the node was linked once), the ID is valid and
+ *                 unique.
+ *
+ *     * node->name: name of this node
+ *       node->hash: 31bit hash-value of @name (range [2..INT_MAX-1])
+ *                   These values follow the same lifetime rules as node->id.
+ *                   They're initialized when the node is linked and then remain
+ *                   constant until the last object reference is dropped.
+ *                   Unlike the id, the name is only unique across all siblings
+ *                   and only until the node is deactivated. Currently, the name
+ *                   is even unique if linked but not activated, yet. This might
+ *                   change in the future, though. Code should not rely on this.
+ *
+ *     * node->lock:   lock to protect node->children, node->rb, node->parent
+ *     * node->parent: Reference to parent node. This is set during LINK time
+ *                     and is dropped during destruction. You can freely access
+ *                     this field, but it may be NULL (root node).
+ *     * node->children: rb-tree of all linked children of this node. You must
+ *                       not access this directly, but use one of the iterator
+ *                       or lookup helpers.
+ */
+
+/*
+ * Bias values track states of "active references". They're all negative. If a
+ * node is active, its active-ref-counter is >=0 and tracks all active
+ * references. Once a node is deactivaed, we subtract NODE_BIAS. This means, the
+ * counter is now negative but still counts the active references. Once it drops
+ * to exactly NODE_BIAS, we know all active references were dropped. Exactly one
+ * thread will change it to NODE_RELEASE now, perform cleanup and then put it
+ * into NODE_DRAINED. Once drained, all other threads that tried deactivating
+ * the node will now be woken up (thus, they wait until the node is fully done).
+ * The initial state during node-setup is NODE_NEW. If a node is directly
+ * deactivated without having ever been active, it is put into
+ * NODE_RELEASE_DIRECT instead of NODE_BIAS. This tracks this one-bit state
+ * across node-deactivation. The task putting it into NODE_RELEASE now knows
+ * whether the node was active before or not.
+ *
+ * Some archs implement atomic_sub(v) with atomic_add(-v), so reserve INT_MIN
+ * to avoid overflows if multiplied by -1.
+ */
+#define KDBUS_NODE_BIAS			(INT_MIN + 5)
+#define KDBUS_NODE_RELEASE_DIRECT	(KDBUS_NODE_BIAS - 1)
+#define KDBUS_NODE_RELEASE		(KDBUS_NODE_BIAS - 2)
+#define KDBUS_NODE_DRAINED		(KDBUS_NODE_BIAS - 3)
+#define KDBUS_NODE_NEW			(KDBUS_NODE_BIAS - 4)
+
+/* global unique ID mapping for kdbus nodes */
+DEFINE_IDA(kdbus_node_ida);
+
+/**
+ * kdbus_node_name_hash() - hash a name
+ * @name:	The string to hash
+ *
+ * This computes the hash of @name. It is guaranteed to be in the range
+ * [2..INT_MAX-1]. The values 1, 2 and INT_MAX are unused as they are reserved
+ * for the filesystem code.
+ *
+ * Return: hash value of the passed string
+ */
+static unsigned int kdbus_node_name_hash(const char *name)
+{
+	unsigned int hash;
+
+	/* reserve hash numbers 0, 1 and >=INT_MAX for magic directories */
+	hash = kdbus_strhash(name) & INT_MAX;
+	if (hash < 2)
+		hash += 2;
+	if (hash >= INT_MAX)
+		hash = INT_MAX - 1;
+
+	return hash;
+}
+
+/**
+ * kdbus_node_name_compare() - compare a name with a node's name
+ * @hash:	hash of the string to compare the node with
+ * @name:	name to compare the node with
+ * @node:	node to compare the name with
+ *
+ * This compares a query string against a kdbus node. If the kdbus node has the
+ * given name, this returns 0. Otherwise, this returns >0 / <0 depending
+ * whether the query string is greater / less than the node.
+ *
+ * Note: If @node is drained but has the name @name, this returns 1. The
+ *       reason for this is that we treat drained nodes as "renamed". The
+ *       slot of such nodes is no longer occupied and new nodes can claim it.
+ *       Obviously, this has the side-effect that you cannot match drained
+ *       nodes, as they will never return 0 on name-matches. But this is
+ *       intentional, as there is no reason why anyone would ever want to match
+ *       on drained nodes.
+ *
+ * Return: 0 if @name and @hash exactly match the information in @node, or
+ * an integer less than or greater than zero if @name is found, respectively,
+ * to be less than or be greater than the string stored in @node.
+ */
+static int kdbus_node_name_compare(unsigned int hash, const char *name,
+				   const struct kdbus_node *node)
+{
+	int ret;
+
+	if (hash != node->hash)
+		return hash - node->hash;
+
+	ret = strcmp(name, node->name);
+	if (ret != 0)
+		return ret;
+
+	return atomic_read(&node->active) == KDBUS_NODE_DRAINED;
+}
+
+/**
+ * kdbus_node_init() - initialize a kdbus_node
+ * @node:	Pointer to the node to initialize
+ * @type:	The type the node will have (KDBUS_NODE_*)
+ *
+ * The caller is responsible of allocating @node and initializating it to zero.
+ * Once this call returns, you must use the node_ref() and node_unref()
+ * functions to manage this node.
+ */
+void kdbus_node_init(struct kdbus_node *node, unsigned int type)
+{
+	atomic_set(&node->refcnt, 1);
+	mutex_init(&node->lock);
+	node->id = 0;
+	node->type = type;
+	RB_CLEAR_NODE(&node->rb);
+	node->children = RB_ROOT;
+	init_waitqueue_head(&node->waitq);
+	atomic_set(&node->active, KDBUS_NODE_NEW);
+}
+
+/**
+ * kdbus_node_link() - link a node into the nodes system
+ * @node:	Pointer to the node to initialize
+ * @parent:	Pointer to a parent node, may be %NULL
+ * @name:	The name of the node (or NULL if root node)
+ *
+ * This links a node into the hierarchy. This must not be called multiple times.
+ * If @parent is NULL, the node becomes a new root node.
+ *
+ * This call will fail if @name is not unique across all its siblings or if no
+ * ID could be allocated. You must not activate a node if linking failed! It is
+ * safe to deactivate it, though.
+ *
+ * Once you linked a node, you must call kdbus_node_drain() before you drop
+ * the last reference (even if you never activate the node).
+ *
+ * Return: 0 on success. negative error otherwise.
+ */
+int kdbus_node_link(struct kdbus_node *node, struct kdbus_node *parent,
+		    const char *name)
+{
+	int ret;
+
+	if (WARN_ON(node->type != KDBUS_NODE_DOMAIN && !parent))
+		return -EINVAL;
+
+	if (WARN_ON(parent && !name))
+		return -EINVAL;
+
+	if (name) {
+		node->name = kstrdup(name, GFP_KERNEL);
+		if (!node->name)
+			return -ENOMEM;
+
+		node->hash = kdbus_node_name_hash(name);
+	}
+
+	ret = ida_simple_get(&kdbus_node_ida, 1, 0, GFP_KERNEL);
+	if (ret < 0)
+		return ret;
+
+	node->id = ret;
+	ret = 0;
+
+	if (parent) {
+		struct rb_node **n, *prev;
+
+		if (!kdbus_node_acquire(parent))
+			return -ESHUTDOWN;
+
+		mutex_lock(&parent->lock);
+
+		n = &parent->children.rb_node;
+		prev = NULL;
+
+		while (*n) {
+			struct kdbus_node *pos;
+			int result;
+
+			pos = kdbus_node_from_rb(*n);
+			prev = *n;
+			result = kdbus_node_name_compare(node->hash,
+							 node->name,
+							 pos);
+			if (result == 0) {
+				ret = -EEXIST;
+				goto exit_unlock;
+			}
+
+			if (result < 0)
+				n = &pos->rb.rb_left;
+			else
+				n = &pos->rb.rb_right;
+		}
+
+		/* add new node and rebalance the tree */
+		rb_link_node(&node->rb, prev, n);
+		rb_insert_color(&node->rb, &parent->children);
+		node->parent = kdbus_node_ref(parent);
+
+exit_unlock:
+		mutex_unlock(&parent->lock);
+		kdbus_node_release(parent);
+	}
+
+	return ret;
+}
+
+/**
+ * kdbus_node_ref() - Acquire object reference
+ * @node:	node to acquire reference to (or NULL)
+ *
+ * This acquires a new reference to @node. You must already own a reference when
+ * calling this!
+ * If @node is NULL, this is a no-op.
+ *
+ * Return: @node is returned
+ */
+struct kdbus_node *kdbus_node_ref(struct kdbus_node *node)
+{
+	if (node)
+		atomic_inc(&node->refcnt);
+	return node;
+}
+
+/**
+ * kdbus_node_unref() - Drop object reference
+ * @node:	node to drop reference to (or NULL)
+ *
+ * This drops an object reference to @node. You must not access the node if you
+ * no longer own a reference.
+ * If the ref-count drops to 0, the object will be destroyed (->free_cb will be
+ * called).
+ *
+ * If you linked or activated the node, you must deactivate the node before you
+ * drop your last reference! If you didn't link or activate the node, you can
+ * drop any reference you want.
+ *
+ * Note that this calls into ->free_cb() and thus _might_ sleep. The ->free_cb()
+ * callbacks must not acquire any outer locks, though. So you can safely drop
+ * references while holding locks (apart from node->parent->lock).
+ *
+ * If @node is NULL, this is a no-op.
+ *
+ * Return: This always returns NULL
+ */
+struct kdbus_node *kdbus_node_unref(struct kdbus_node *node)
+{
+	if (node && atomic_dec_and_test(&node->refcnt)) {
+		struct kdbus_node safe = *node;
+
+		WARN_ON(atomic_read(&node->active) != KDBUS_NODE_DRAINED);
+
+		if (node->parent) {
+			mutex_lock(&node->parent->lock);
+			if (!RB_EMPTY_NODE(&node->rb)) {
+				rb_erase(&node->rb,
+					 &node->parent->children);
+				RB_CLEAR_NODE(&node->rb);
+			}
+			mutex_unlock(&node->parent->lock);
+		}
+
+		if (node->free_cb)
+			node->free_cb(node);
+		if (safe.id > 0)
+			ida_simple_remove(&kdbus_node_ida, safe.id);
+
+		kfree(safe.name);
+		kdbus_node_unref(safe.parent);
+	}
+
+	return NULL;
+}
+
+/**
+ * kdbus_node_is_active() - test whether a node is active
+ * @node:	node to test
+ *
+ * This checks whether @node is active. That means, @node was linked and
+ * activated by the node owner and hasn't been deactivated, yet. If, and only
+ * if, a node is active, kdbus_node_acquire() will be able to acquire active
+ * references.
+ *
+ * Note that this function does not give any lifetime guarantees. After this
+ * call returns, the node might be deactivated immediately. Normally, what you
+ * want is to acquire a real active reference via kdbus_node_acquire().
+ *
+ * Return: true if @node is active, false otherwise
+ */
+bool kdbus_node_is_active(struct kdbus_node *node)
+{
+	return atomic_read(&node->active) >= 0;
+}
+
+/**
+ * kdbus_node_is_deactivated() - test whether a node was already deactivated
+ * @node:	node to test
+ *
+ * This checks whether kdbus_node_deactivate() was called on @node. Note that
+ * this might be true even if you never deactivated the node directly, but only
+ * one of its ancestors.
+ *
+ * Note that even if this returns 'false', the node might get deactivated
+ * immediately after the call returns.
+ *
+ * Return: true if @node was already deactivated, false if not
+ */
+bool kdbus_node_is_deactivated(struct kdbus_node *node)
+{
+	int v;
+
+	v = atomic_read(&node->active);
+	return v != KDBUS_NODE_NEW && v < 0;
+}
+
+/**
+ * kdbus_node_activate() - activate a node
+ * @node:	node to activate
+ *
+ * This marks @node as active if, and only if, the node wasn't activated nor
+ * deactivated, yet, and the parent is still active. Any but the first call to
+ * kdbus_node_activate() is a no-op.
+ * If you called kdbus_node_deactivate() before, then even the first call to
+ * kdbus_node_activate() will be a no-op.
+ *
+ * This call doesn't give any lifetime guarantees. The node might get
+ * deactivated immediately after this call returns. Or the parent might already
+ * be deactivated, which will make this call a no-op.
+ *
+ * If this call successfully activated a node, it will take an object reference
+ * to it. This reference is dropped after the node is deactivated. Therefore,
+ * the object owner can safely drop their reference to @node iff they know that
+ * its parent node will get deactivated at some point. Once the parent node is
+ * deactivated, it will deactivate all its child and thus drop this reference
+ * again.
+ *
+ * Return: True if this call successfully activated the node, otherwise false.
+ *         Note that this might return false, even if the node is still active
+ *         (eg., if you called this a second time).
+ */
+bool kdbus_node_activate(struct kdbus_node *node)
+{
+	bool res = false;
+
+	mutex_lock(&node->lock);
+	if (atomic_read(&node->active) == KDBUS_NODE_NEW) {
+		atomic_sub(KDBUS_NODE_NEW, &node->active);
+		/* activated nodes have ref +1 */
+		kdbus_node_ref(node);
+		res = true;
+	}
+	mutex_unlock(&node->lock);
+
+	return res;
+}
+
+/**
+ * kdbus_node_recurse_unlock() - advance iterator on a tree
+ * @start:	node at which the iteration started
+ * @node:	previously visited node
+ *
+ * This helper advances an iterator by one, when traversing a node tree. It is
+ * supposed to be used like this:
+ *
+ *     struct kdbus_node *n;
+ *
+ *     n = start;
+ *     while (n) {
+ *             mutex_lock(&n->lock);
+ *             ... visit @n ...
+ *             n = kdbus_node_recurse_unlock(start, n);
+ *     }
+ *
+ * This helpers takes as input the start-node of the iteration and the current
+ * position. It returns a pointer to the next node to visit. The caller must
+ * hold a reference to @start during the whole iteration. Furthermore, @node
+ * must be locked when entering this helper. On return, the lock is released.
+ *
+ * The order of visit is pre-order traversal.
+ *
+ * If @node is deactivated before recursing its children, then it is guaranteed
+ * that all children will be visited. If @node is still active, new nodes might
+ * be inserted during traversal, and thus might be missed.
+ *
+ * Also note that the node-locks are released while traversing children. You
+ * must not rely on the locks to be held during the whole traversal. Each node
+ * that is visited is pinned by this helper, so the caller can rely on owning a
+ * reference. It is dropped, once all of the children of the node have been
+ * visited (recursively).
+ *
+ * You *must not* bail out of a traversal early, otherwise you'll leak
+ * ref-counts to all nodes in the current depth-path.
+ *
+ * Return: Reference to next node, or NULL.
+ */
+static struct kdbus_node *kdbus_node_recurse_unlock(struct kdbus_node *start,
+						    struct kdbus_node *node)
+{
+	struct kdbus_node *t, *prev = NULL;
+	struct rb_node *rb;
+
+	lockdep_assert_held(&node->lock);
+
+	rb = rb_first(&node->children);
+	if (!rb) {
+		do {
+			mutex_unlock(&node->lock);
+			kdbus_node_unref(prev);
+
+			if (node == start)
+				return NULL;
+
+			prev = node;
+			node = node->parent;
+
+			mutex_lock(&node->lock);
+			rb = rb_next(&prev->rb);
+		} while (!rb);
+	}
+
+	t = kdbus_node_ref(kdbus_node_from_rb(rb));
+	mutex_unlock(&node->lock);
+	kdbus_node_unref(prev);
+	return t;
+}
+
+/**
+ * kdbus_node_deactivate() - deactivate a node
+ * @node:	node to deactivate
+ *
+ * This recursively deactivates the passed node and all its children. The nodes
+ * are marked as deactivated, but they're not drained. Hence, even after this
+ * call returns, there might still be someone holding an active reference to
+ * any of the nodes. However, no new active references can be acquired after
+ * this returns.
+ *
+ * It is safe to call this multiple times (even in parallel). Each call is
+ * guaranteed to only return after _all_ nodes have been deactivated.
+ */
+void kdbus_node_deactivate(struct kdbus_node *node)
+{
+	struct kdbus_node *pos;
+	int v;
+
+	pos = node;
+	while (pos) {
+		mutex_lock(&pos->lock);
+
+		/*
+		 * Add BIAS to pos->active to mark it as inactive. If it was
+		 * never active before, immediately mark it as RELEASE_INACTIVE
+		 * so that this case can be detected later on.
+		 * If the node was already deactivated, make sure to still
+		 * recurse into the children. Otherwise, we might return before
+		 * a racing thread finished deactivating all children. But we
+		 * want to guarantee that the whole tree is deactivated once
+		 * this returns.
+		 */
+		v = atomic_read(&pos->active);
+		if (v >= 0)
+			atomic_add_return(KDBUS_NODE_BIAS, &pos->active);
+		else if (v == KDBUS_NODE_NEW)
+			atomic_set(&pos->active, KDBUS_NODE_RELEASE_DIRECT);
+
+		pos = kdbus_node_recurse_unlock(node, pos);
+	}
+}
+
+/**
+ * kdbus_node_drain() - drain a node
+ * @node:	node to drain
+ *
+ * This function recursively deactivates this node and all its children and
+ * then waits for all active references to be dropped. This function is a
+ * superset of kdbus_node_deactivate(), as it additionally drains all nodes. It
+ * returns only once all children and the node itself were recursively drained
+ * (even if you call this function multiple times in parallel).
+ *
+ * It is safe to call this function on _any_ node that was initialized _any_
+ * number of times.
+ *
+ * This call may sleep, as it waits for all active references to be dropped.
+ */
+void kdbus_node_drain(struct kdbus_node *node)
+{
+	struct kdbus_node *pos;
+	int v;
+
+	kdbus_node_deactivate(node);
+
+	pos = node;
+	while (pos) {
+		/* wait until all active references were dropped */
+		wait_event(pos->waitq,
+			   atomic_read(&pos->active) <= KDBUS_NODE_BIAS);
+
+		/* mark object as RELEASE */
+		mutex_lock(&pos->lock);
+		v = atomic_read(&pos->active);
+		if (v == KDBUS_NODE_BIAS || v == KDBUS_NODE_RELEASE_DIRECT)
+			atomic_set(&pos->active, KDBUS_NODE_RELEASE);
+		mutex_unlock(&pos->lock);
+
+		/*
+		 * If this is the thread that marked the object as RELEASE, we
+		 * perform the actual release. Otherwise, we wait until the
+		 * release is done and the node is marked as DRAINED.
+		 */
+		if (v == KDBUS_NODE_BIAS || v == KDBUS_NODE_RELEASE_DIRECT) {
+			if (pos->release_cb)
+				pos->release_cb(pos, v == KDBUS_NODE_BIAS);
+
+			/* mark as DRAINED */
+			atomic_set(&pos->active, KDBUS_NODE_DRAINED);
+			wake_up_all(&pos->waitq);
+
+			/* drop VFS cache */
+			kdbus_fs_flush(pos);
+
+			/*
+			 * If the node was activated and someone subtracted BIAS
+			 * from it to deactivate it, we, and only us, are
+			 * responsible to release the extra ref-count that was
+			 * taken once in kdbus_node_activate().
+			 * If the node was never activated, no-one ever
+			 * subtracted BIAS, but instead skipped that state and
+			 * immediately went to NODE_RELEASE_DIRECT. In that case
+			 * we must not drop the reference.
+			 */
+			if (v == KDBUS_NODE_BIAS)
+				kdbus_node_unref(pos);
+		} else {
+			/* wait until object is DRAINED */
+			wait_event(pos->waitq,
+			    atomic_read(&pos->active) == KDBUS_NODE_DRAINED);
+		}
+
+		mutex_lock(&pos->lock);
+		pos = kdbus_node_recurse_unlock(node, pos);
+	}
+}
+
+/**
+ * kdbus_node_acquire() - Acquire an active ref on a node
+ * @node:	The node
+ *
+ * This acquires an active-reference to @node. This will only succeed if the
+ * node is active. You must release this active reference via
+ * kdbus_node_release() again.
+ *
+ * See the introduction to "active references" for more details.
+ *
+ * Return: %true if @node was non-NULL and active
+ */
+bool kdbus_node_acquire(struct kdbus_node *node)
+{
+	return node && atomic_inc_unless_negative(&node->active);
+}
+
+/**
+ * kdbus_node_release() - Release an active ref on a node
+ * @node:	The node
+ *
+ * This releases an active reference that was previously acquired via
+ * kdbus_node_acquire(). See kdbus_node_acquire() for details.
+ */
+void kdbus_node_release(struct kdbus_node *node)
+{
+	if (node && atomic_dec_return(&node->active) == KDBUS_NODE_BIAS)
+		wake_up(&node->waitq);
+}
+
+/**
+ * kdbus_node_find_child() - Find child by name
+ * @node:	parent node to search through
+ * @name:	name of child node
+ *
+ * This searches through all children of @node for a child-node with name @name.
+ * If not found, or if the child is deactivated, NULL is returned. Otherwise,
+ * the child is acquired and a new reference is returned.
+ *
+ * If you're done with the child, you need to release it and drop your
+ * reference.
+ *
+ * This function does not acquire the parent node. However, if the parent was
+ * already deactivated, then kdbus_node_deactivate() will, at some point, also
+ * deactivate the child. Therefore, we can rely on the explicit ordering during
+ * deactivation.
+ *
+ * Return: Reference to acquired child node, or NULL if not found / not active.
+ */
+struct kdbus_node *kdbus_node_find_child(struct kdbus_node *node,
+					 const char *name)
+{
+	struct kdbus_node *child;
+	struct rb_node *rb;
+	unsigned int hash;
+	int ret;
+
+	hash = kdbus_node_name_hash(name);
+
+	mutex_lock(&node->lock);
+	rb = node->children.rb_node;
+	while (rb) {
+		child = kdbus_node_from_rb(rb);
+		ret = kdbus_node_name_compare(hash, name, child);
+		if (ret < 0)
+			rb = rb->rb_left;
+		else if (ret > 0)
+			rb = rb->rb_right;
+		else
+			break;
+	}
+	if (rb && kdbus_node_acquire(child))
+		kdbus_node_ref(child);
+	else
+		child = NULL;
+	mutex_unlock(&node->lock);
+
+	return child;
+}
+
+static struct kdbus_node *node_find_closest_unlocked(struct kdbus_node *node,
+						     unsigned int hash,
+						     const char *name)
+{
+	struct kdbus_node *n, *pos = NULL;
+	struct rb_node *rb;
+	int res;
+
+	/*
+	 * Find the closest child with ``node->hash >= hash'', or, if @name is
+	 * valid, ``node->name >= name'' (where '>=' is the lex. order).
+	 */
+
+	rb = node->children.rb_node;
+	while (rb) {
+		n = kdbus_node_from_rb(rb);
+
+		if (name)
+			res = kdbus_node_name_compare(hash, name, n);
+		else
+			res = hash - n->hash;
+
+		if (res <= 0) {
+			rb = rb->rb_left;
+			pos = n;
+		} else { /* ``hash > n->hash'', ``name > n->name'' */
+			rb = rb->rb_right;
+		}
+	}
+
+	return pos;
+}
+
+/**
+ * kdbus_node_find_closest() - Find closest child-match
+ * @node:	parent node to search through
+ * @hash:	hash value to find closest match for
+ *
+ * Find the closest child of @node with a hash greater than or equal to @hash.
+ * The closest match is the left-most child of @node with this property. Which
+ * means, it is the first child with that hash returned by
+ * kdbus_node_next_child(), if you'd iterate the whole parent node.
+ *
+ * Return: Reference to acquired child, or NULL if none found.
+ */
+struct kdbus_node *kdbus_node_find_closest(struct kdbus_node *node,
+					   unsigned int hash)
+{
+	struct kdbus_node *child;
+	struct rb_node *rb;
+
+	mutex_lock(&node->lock);
+
+	child = node_find_closest_unlocked(node, hash, NULL);
+	while (child && !kdbus_node_acquire(child)) {
+		rb = rb_next(&child->rb);
+		if (rb)
+			child = kdbus_node_from_rb(rb);
+		else
+			child = NULL;
+	}
+	kdbus_node_ref(child);
+
+	mutex_unlock(&node->lock);
+
+	return child;
+}
+
+/**
+ * kdbus_node_next_child() - Acquire next child
+ * @node:	parent node
+ * @prev:	previous child-node position or NULL
+ *
+ * This function returns a reference to the next active child of @node, after
+ * the passed position @prev. If @prev is NULL, a reference to the first active
+ * child is returned. If no more active children are found, NULL is returned.
+ *
+ * This function acquires the next child it returns. If you're done with the
+ * returned pointer, you need to release _and_ unref it.
+ *
+ * The passed in pointer @prev is not modified by this function, and it does
+ * *not* have to be active. If @prev was acquired via different means, or if it
+ * was unlinked from its parent before you pass it in, then this iterator will
+ * still return the next active child (it will have to search through the
+ * rb-tree based on the node-name, though).
+ * However, @prev must not be linked to a different parent than @node!
+ *
+ * Return: Reference to next acquired child, or NULL if at the end.
+ */
+struct kdbus_node *kdbus_node_next_child(struct kdbus_node *node,
+					 struct kdbus_node *prev)
+{
+	struct kdbus_node *pos = NULL;
+	struct rb_node *rb;
+
+	mutex_lock(&node->lock);
+
+	if (!prev) {
+		/*
+		 * New iteration; find first node in rb-tree and try to acquire
+		 * it. If we got it, directly return it as first element.
+		 * Otherwise, the loop below will find the next active node.
+		 */
+		rb = rb_first(&node->children);
+		if (!rb)
+			goto exit;
+		pos = kdbus_node_from_rb(rb);
+		if (kdbus_node_acquire(pos))
+			goto exit;
+	} else {
+		/*
+		 * The current iterator is still linked to the parent. Set it
+		 * as current position and use the loop below to find the next
+		 * active element.
+		 */
+		pos = prev;
+	}
+
+	/* @pos was already returned or is inactive; find next active node */
+	do {
+		rb = rb_next(&pos->rb);
+		if (rb)
+			pos = kdbus_node_from_rb(rb);
+		else
+			pos = NULL;
+	} while (pos && !kdbus_node_acquire(pos));
+
+exit:
+	/* @pos is NULL or acquired. Take ref if non-NULL and return it */
+	kdbus_node_ref(pos);
+	mutex_unlock(&node->lock);
+	return pos;
+}
diff -Nur linux-4.2/ipc/kdbus/node.h linux-4.2-pck/ipc/kdbus/node.h
--- linux-4.2/ipc/kdbus/node.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/node.h	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_NODE_H
+#define __KDBUS_NODE_H
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+
+struct kdbus_node;
+
+enum kdbus_node_type {
+	KDBUS_NODE_DOMAIN,
+	KDBUS_NODE_CONTROL,
+	KDBUS_NODE_BUS,
+	KDBUS_NODE_ENDPOINT,
+};
+
+typedef void (*kdbus_node_free_t) (struct kdbus_node *node);
+typedef void (*kdbus_node_release_t) (struct kdbus_node *node, bool was_active);
+
+struct kdbus_node {
+	struct mutex lock;
+	atomic_t refcnt;
+	atomic_t active;
+	wait_queue_head_t waitq;
+
+	/* static members */
+	unsigned int type;
+	kdbus_node_free_t free_cb;
+	kdbus_node_release_t release_cb;
+	umode_t mode;
+	kuid_t uid;
+	kgid_t gid;
+
+	/* valid once linked */
+	char *name;
+	unsigned int hash;
+	unsigned int id;
+	struct kdbus_node *parent; /* may be NULL */
+	struct rb_node rb;
+
+	/* dynamic list of children */
+	struct rb_root children;
+};
+
+#define kdbus_node_from_rb(_node) rb_entry((_node), struct kdbus_node, rb)
+
+extern struct ida kdbus_node_ida;
+
+void kdbus_node_init(struct kdbus_node *node, unsigned int type);
+
+int kdbus_node_link(struct kdbus_node *node, struct kdbus_node *parent,
+		    const char *name);
+
+struct kdbus_node *kdbus_node_ref(struct kdbus_node *node);
+struct kdbus_node *kdbus_node_unref(struct kdbus_node *node);
+
+bool kdbus_node_is_active(struct kdbus_node *node);
+bool kdbus_node_is_deactivated(struct kdbus_node *node);
+bool kdbus_node_activate(struct kdbus_node *node);
+void kdbus_node_deactivate(struct kdbus_node *node);
+void kdbus_node_drain(struct kdbus_node *node);
+
+bool kdbus_node_acquire(struct kdbus_node *node);
+void kdbus_node_release(struct kdbus_node *node);
+
+struct kdbus_node *kdbus_node_find_child(struct kdbus_node *node,
+					 const char *name);
+struct kdbus_node *kdbus_node_find_closest(struct kdbus_node *node,
+					   unsigned int hash);
+struct kdbus_node *kdbus_node_next_child(struct kdbus_node *node,
+					 struct kdbus_node *prev);
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/notify.c linux-4.2-pck/ipc/kdbus/notify.c
--- linux-4.2/ipc/kdbus/notify.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/notify.c	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,204 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "bus.h"
+#include "connection.h"
+#include "domain.h"
+#include "endpoint.h"
+#include "item.h"
+#include "message.h"
+#include "notify.h"
+
+static inline void kdbus_notify_add_tail(struct kdbus_staging *staging,
+					 struct kdbus_bus *bus)
+{
+	spin_lock(&bus->notify_lock);
+	list_add_tail(&staging->notify_entry, &bus->notify_list);
+	spin_unlock(&bus->notify_lock);
+}
+
+static int kdbus_notify_reply(struct kdbus_bus *bus, u64 id,
+			      u64 cookie, u64 msg_type)
+{
+	struct kdbus_staging *s;
+
+	s = kdbus_staging_new_kernel(bus, id, cookie, 0, msg_type);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	kdbus_notify_add_tail(s, bus);
+	return 0;
+}
+
+/**
+ * kdbus_notify_reply_timeout() - queue a timeout reply
+ * @bus:		Bus which queues the messages
+ * @id:			The destination's connection ID
+ * @cookie:		The cookie to set in the reply.
+ *
+ * Queues a message that has a KDBUS_ITEM_REPLY_TIMEOUT item attached.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int kdbus_notify_reply_timeout(struct kdbus_bus *bus, u64 id, u64 cookie)
+{
+	return kdbus_notify_reply(bus, id, cookie, KDBUS_ITEM_REPLY_TIMEOUT);
+}
+
+/**
+ * kdbus_notify_reply_dead() - queue a 'dead' reply
+ * @bus:		Bus which queues the messages
+ * @id:			The destination's connection ID
+ * @cookie:		The cookie to set in the reply.
+ *
+ * Queues a message that has a KDBUS_ITEM_REPLY_DEAD item attached.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int kdbus_notify_reply_dead(struct kdbus_bus *bus, u64 id, u64 cookie)
+{
+	return kdbus_notify_reply(bus, id, cookie, KDBUS_ITEM_REPLY_DEAD);
+}
+
+/**
+ * kdbus_notify_name_change() - queue a notification about a name owner change
+ * @bus:		Bus which queues the messages
+ * @type:		The type if the notification; KDBUS_ITEM_NAME_ADD,
+ *			KDBUS_ITEM_NAME_CHANGE or KDBUS_ITEM_NAME_REMOVE
+ * @old_id:		The id of the connection that used to own the name
+ * @new_id:		The id of the new owner connection
+ * @old_flags:		The flags to pass in the KDBUS_ITEM flags field for
+ *			the old owner
+ * @new_flags:		The flags to pass in the KDBUS_ITEM flags field for
+ *			the new owner
+ * @name:		The name that was removed or assigned to a new owner
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int kdbus_notify_name_change(struct kdbus_bus *bus, u64 type,
+			     u64 old_id, u64 new_id,
+			     u64 old_flags, u64 new_flags,
+			     const char *name)
+{
+	size_t name_len, extra_size;
+	struct kdbus_staging *s;
+
+	name_len = strlen(name) + 1;
+	extra_size = sizeof(struct kdbus_notify_name_change) + name_len;
+
+	s = kdbus_staging_new_kernel(bus, KDBUS_DST_ID_BROADCAST, 0,
+				     extra_size, type);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	s->notify->name_change.old_id.id = old_id;
+	s->notify->name_change.old_id.flags = old_flags;
+	s->notify->name_change.new_id.id = new_id;
+	s->notify->name_change.new_id.flags = new_flags;
+	memcpy(s->notify->name_change.name, name, name_len);
+
+	kdbus_notify_add_tail(s, bus);
+	return 0;
+}
+
+/**
+ * kdbus_notify_id_change() - queue a notification about a unique ID change
+ * @bus:		Bus which queues the messages
+ * @type:		The type if the notification; KDBUS_ITEM_ID_ADD or
+ *			KDBUS_ITEM_ID_REMOVE
+ * @id:			The id of the connection that was added or removed
+ * @flags:		The flags to pass in the KDBUS_ITEM flags field
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int kdbus_notify_id_change(struct kdbus_bus *bus, u64 type, u64 id, u64 flags)
+{
+	struct kdbus_staging *s;
+	size_t extra_size;
+
+	extra_size = sizeof(struct kdbus_notify_id_change);
+	s = kdbus_staging_new_kernel(bus, KDBUS_DST_ID_BROADCAST, 0,
+				     extra_size, type);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	s->notify->id_change.id = id;
+	s->notify->id_change.flags = flags;
+
+	kdbus_notify_add_tail(s, bus);
+	return 0;
+}
+
+/**
+ * kdbus_notify_flush() - send a list of collected messages
+ * @bus:		Bus which queues the messages
+ *
+ * The list is empty after sending the messages.
+ */
+void kdbus_notify_flush(struct kdbus_bus *bus)
+{
+	LIST_HEAD(notify_list);
+	struct kdbus_staging *s, *tmp;
+
+	mutex_lock(&bus->notify_flush_lock);
+	down_read(&bus->name_registry->rwlock);
+
+	spin_lock(&bus->notify_lock);
+	list_splice_init(&bus->notify_list, &notify_list);
+	spin_unlock(&bus->notify_lock);
+
+	list_for_each_entry_safe(s, tmp, &notify_list, notify_entry) {
+		if (s->msg->dst_id != KDBUS_DST_ID_BROADCAST) {
+			struct kdbus_conn *conn;
+
+			conn = kdbus_bus_find_conn_by_id(bus, s->msg->dst_id);
+			if (conn) {
+				kdbus_bus_eavesdrop(bus, NULL, s);
+				kdbus_conn_entry_insert(NULL, conn, s, NULL,
+							NULL);
+				kdbus_conn_unref(conn);
+			}
+		} else {
+			kdbus_bus_broadcast(bus, NULL, s);
+		}
+
+		list_del(&s->notify_entry);
+		kdbus_staging_free(s);
+	}
+
+	up_read(&bus->name_registry->rwlock);
+	mutex_unlock(&bus->notify_flush_lock);
+}
+
+/**
+ * kdbus_notify_free() - free a list of collected messages
+ * @bus:		Bus which queues the messages
+ */
+void kdbus_notify_free(struct kdbus_bus *bus)
+{
+	struct kdbus_staging *s, *tmp;
+
+	list_for_each_entry_safe(s, tmp, &bus->notify_list, notify_entry) {
+		list_del(&s->notify_entry);
+		kdbus_staging_free(s);
+	}
+}
diff -Nur linux-4.2/ipc/kdbus/notify.h linux-4.2-pck/ipc/kdbus/notify.h
--- linux-4.2/ipc/kdbus/notify.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/notify.h	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_NOTIFY_H
+#define __KDBUS_NOTIFY_H
+
+struct kdbus_bus;
+
+int kdbus_notify_id_change(struct kdbus_bus *bus, u64 type, u64 id, u64 flags);
+int kdbus_notify_reply_timeout(struct kdbus_bus *bus, u64 id, u64 cookie);
+int kdbus_notify_reply_dead(struct kdbus_bus *bus, u64 id, u64 cookie);
+int kdbus_notify_name_change(struct kdbus_bus *bus, u64 type,
+			     u64 old_id, u64 new_id,
+			     u64 old_flags, u64 new_flags,
+			     const char *name);
+void kdbus_notify_flush(struct kdbus_bus *bus);
+void kdbus_notify_free(struct kdbus_bus *bus);
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/policy.c linux-4.2-pck/ipc/kdbus/policy.c
--- linux-4.2/ipc/kdbus/policy.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/policy.c	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,489 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "bus.h"
+#include "connection.h"
+#include "domain.h"
+#include "item.h"
+#include "names.h"
+#include "policy.h"
+
+#define KDBUS_POLICY_HASH_SIZE	64
+
+/**
+ * struct kdbus_policy_db_entry_access - a database entry access item
+ * @type:		One of KDBUS_POLICY_ACCESS_* types
+ * @access:		Access to grant. One of KDBUS_POLICY_*
+ * @uid:		For KDBUS_POLICY_ACCESS_USER, the global uid
+ * @gid:		For KDBUS_POLICY_ACCESS_GROUP, the global gid
+ * @list:		List entry item for the entry's list
+ *
+ * This is the internal version of struct kdbus_policy_db_access.
+ */
+struct kdbus_policy_db_entry_access {
+	u8 type;		/* USER, GROUP, WORLD */
+	u8 access;		/* OWN, TALK, SEE */
+	union {
+		kuid_t uid;	/* global uid */
+		kgid_t gid;	/* global gid */
+	};
+	struct list_head list;
+};
+
+/**
+ * struct kdbus_policy_db_entry - a policy database entry
+ * @name:		The name to match the policy entry against
+ * @hentry:		The hash entry for the database's entries_hash
+ * @access_list:	List head for keeping tracks of the entry's
+ *			access items.
+ * @owner:		The owner of this entry. Can be a kdbus_conn or
+ *			a kdbus_ep object.
+ * @wildcard:		The name is a wildcard, such as ending on '.*'
+ */
+struct kdbus_policy_db_entry {
+	char *name;
+	struct hlist_node hentry;
+	struct list_head access_list;
+	const void *owner;
+	bool wildcard:1;
+};
+
+static void kdbus_policy_entry_free(struct kdbus_policy_db_entry *e)
+{
+	struct kdbus_policy_db_entry_access *a, *tmp;
+
+	list_for_each_entry_safe(a, tmp, &e->access_list, list) {
+		list_del(&a->list);
+		kfree(a);
+	}
+
+	kfree(e->name);
+	kfree(e);
+}
+
+static unsigned int kdbus_strnhash(const char *str, size_t len)
+{
+	unsigned long hash = init_name_hash();
+
+	while (len--)
+		hash = partial_name_hash(*str++, hash);
+
+	return end_name_hash(hash);
+}
+
+static const struct kdbus_policy_db_entry *
+kdbus_policy_lookup(struct kdbus_policy_db *db, const char *name, u32 hash)
+{
+	struct kdbus_policy_db_entry *e;
+	const char *dot;
+	size_t len;
+
+	/* find exact match */
+	hash_for_each_possible(db->entries_hash, e, hentry, hash)
+		if (strcmp(e->name, name) == 0 && !e->wildcard)
+			return e;
+
+	/* find wildcard match */
+
+	dot = strrchr(name, '.');
+	if (!dot)
+		return NULL;
+
+	len = dot - name;
+	hash = kdbus_strnhash(name, len);
+
+	hash_for_each_possible(db->entries_hash, e, hentry, hash)
+		if (e->wildcard && !strncmp(e->name, name, len) &&
+		    !e->name[len])
+			return e;
+
+	return NULL;
+}
+
+/**
+ * kdbus_policy_db_clear - release all memory from a policy db
+ * @db:		The policy database
+ */
+void kdbus_policy_db_clear(struct kdbus_policy_db *db)
+{
+	struct kdbus_policy_db_entry *e;
+	struct hlist_node *tmp;
+	unsigned int i;
+
+	/* purge entries */
+	down_write(&db->entries_rwlock);
+	hash_for_each_safe(db->entries_hash, i, tmp, e, hentry) {
+		hash_del(&e->hentry);
+		kdbus_policy_entry_free(e);
+	}
+	up_write(&db->entries_rwlock);
+}
+
+/**
+ * kdbus_policy_db_init() - initialize a new policy database
+ * @db:		The location of the database
+ *
+ * This initializes a new policy-db. The underlying memory must have been
+ * cleared to zero by the caller.
+ */
+void kdbus_policy_db_init(struct kdbus_policy_db *db)
+{
+	hash_init(db->entries_hash);
+	init_rwsem(&db->entries_rwlock);
+}
+
+/**
+ * kdbus_policy_query_unlocked() - Query the policy database
+ * @db:		Policy database
+ * @cred:	Credentials to test against
+ * @name:	Name to query
+ * @hash:	Hash value of @name
+ *
+ * Same as kdbus_policy_query() but requires the caller to lock the policy
+ * database against concurrent writes.
+ *
+ * Return: The highest KDBUS_POLICY_* access type found, or -EPERM if none.
+ */
+int kdbus_policy_query_unlocked(struct kdbus_policy_db *db,
+				const struct cred *cred, const char *name,
+				unsigned int hash)
+{
+	struct kdbus_policy_db_entry_access *a;
+	const struct kdbus_policy_db_entry *e;
+	int i, highest = -EPERM;
+
+	e = kdbus_policy_lookup(db, name, hash);
+	if (!e)
+		return -EPERM;
+
+	list_for_each_entry(a, &e->access_list, list) {
+		if ((int)a->access <= highest)
+			continue;
+
+		switch (a->type) {
+		case KDBUS_POLICY_ACCESS_USER:
+			if (uid_eq(cred->euid, a->uid))
+				highest = a->access;
+			break;
+		case KDBUS_POLICY_ACCESS_GROUP:
+			if (gid_eq(cred->egid, a->gid)) {
+				highest = a->access;
+				break;
+			}
+
+			for (i = 0; i < cred->group_info->ngroups; i++) {
+				kgid_t gid = GROUP_AT(cred->group_info, i);
+
+				if (gid_eq(gid, a->gid)) {
+					highest = a->access;
+					break;
+				}
+			}
+
+			break;
+		case KDBUS_POLICY_ACCESS_WORLD:
+			highest = a->access;
+			break;
+		}
+
+		/* OWN is the highest possible policy */
+		if (highest >= KDBUS_POLICY_OWN)
+			break;
+	}
+
+	return highest;
+}
+
+/**
+ * kdbus_policy_query() - Query the policy database
+ * @db:		Policy database
+ * @cred:	Credentials to test against
+ * @name:	Name to query
+ * @hash:	Hash value of @name
+ *
+ * Query the policy database @db for the access rights of @cred to the name
+ * @name. The access rights of @cred are returned, or -EPERM if no access is
+ * granted.
+ *
+ * This call effectively searches for the highest access-right granted to
+ * @cred. The caller should really cache those as policy lookups are rather
+ * expensive.
+ *
+ * Return: The highest KDBUS_POLICY_* access type found, or -EPERM if none.
+ */
+int kdbus_policy_query(struct kdbus_policy_db *db, const struct cred *cred,
+		       const char *name, unsigned int hash)
+{
+	int ret;
+
+	down_read(&db->entries_rwlock);
+	ret = kdbus_policy_query_unlocked(db, cred, name, hash);
+	up_read(&db->entries_rwlock);
+
+	return ret;
+}
+
+static void __kdbus_policy_remove_owner(struct kdbus_policy_db *db,
+					const void *owner)
+{
+	struct kdbus_policy_db_entry *e;
+	struct hlist_node *tmp;
+	int i;
+
+	hash_for_each_safe(db->entries_hash, i, tmp, e, hentry)
+		if (e->owner == owner) {
+			hash_del(&e->hentry);
+			kdbus_policy_entry_free(e);
+		}
+}
+
+/**
+ * kdbus_policy_remove_owner() - remove all entries related to a connection
+ * @db:		The policy database
+ * @owner:	The connection which items to remove
+ */
+void kdbus_policy_remove_owner(struct kdbus_policy_db *db,
+			       const void *owner)
+{
+	down_write(&db->entries_rwlock);
+	__kdbus_policy_remove_owner(db, owner);
+	up_write(&db->entries_rwlock);
+}
+
+/*
+ * Convert user provided policy access to internal kdbus policy
+ * access
+ */
+static struct kdbus_policy_db_entry_access *
+kdbus_policy_make_access(const struct kdbus_policy_access *uaccess)
+{
+	int ret;
+	struct kdbus_policy_db_entry_access *a;
+
+	a = kzalloc(sizeof(*a), GFP_KERNEL);
+	if (!a)
+		return ERR_PTR(-ENOMEM);
+
+	ret = -EINVAL;
+	switch (uaccess->access) {
+	case KDBUS_POLICY_SEE:
+	case KDBUS_POLICY_TALK:
+	case KDBUS_POLICY_OWN:
+		a->access = uaccess->access;
+		break;
+	default:
+		goto err;
+	}
+
+	switch (uaccess->type) {
+	case KDBUS_POLICY_ACCESS_USER:
+		a->uid = make_kuid(current_user_ns(), uaccess->id);
+		if (!uid_valid(a->uid))
+			goto err;
+
+		break;
+	case KDBUS_POLICY_ACCESS_GROUP:
+		a->gid = make_kgid(current_user_ns(), uaccess->id);
+		if (!gid_valid(a->gid))
+			goto err;
+
+		break;
+	case KDBUS_POLICY_ACCESS_WORLD:
+		break;
+	default:
+		goto err;
+	}
+
+	a->type = uaccess->type;
+
+	return a;
+
+err:
+	kfree(a);
+	return ERR_PTR(ret);
+}
+
+/**
+ * kdbus_policy_set() - set a connection's policy rules
+ * @db:				The policy database
+ * @items:			A list of kdbus_item elements that contain both
+ *				names and access rules to set.
+ * @items_size:			The total size of the items.
+ * @max_policies:		The maximum number of policy entries to allow.
+ *				Pass 0 for no limit.
+ * @allow_wildcards:		Boolean value whether wildcard entries (such
+ *				ending on '.*') should be allowed.
+ * @owner:			The owner of the new policy items.
+ *
+ * This function sets a new set of policies for a given owner. The names and
+ * access rules are gathered by walking the list of items passed in as
+ * argument. An item of type KDBUS_ITEM_NAME is expected before any number of
+ * KDBUS_ITEM_POLICY_ACCESS items. If there are more repetitions of this
+ * pattern than denoted in @max_policies, -EINVAL is returned.
+ *
+ * In order to allow atomic replacement of rules, the function first removes
+ * all entries that have been created for the given owner previously.
+ *
+ * Callers to this function must make sure that the owner is a custom
+ * endpoint, or if the endpoint is a default endpoint, then it must be
+ * either a policy holder or an activator.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int kdbus_policy_set(struct kdbus_policy_db *db,
+		     const struct kdbus_item *items,
+		     size_t items_size,
+		     size_t max_policies,
+		     bool allow_wildcards,
+		     const void *owner)
+{
+	struct kdbus_policy_db_entry_access *a;
+	struct kdbus_policy_db_entry *e, *p;
+	const struct kdbus_item *item;
+	struct hlist_node *tmp;
+	HLIST_HEAD(entries);
+	HLIST_HEAD(restore);
+	size_t count = 0;
+	int i, ret = 0;
+	u32 hash;
+
+	/* Walk the list of items and look for new policies */
+	e = NULL;
+	KDBUS_ITEMS_FOREACH(item, items, items_size) {
+		switch (item->type) {
+		case KDBUS_ITEM_NAME: {
+			size_t len;
+
+			if (max_policies && ++count > max_policies) {
+				ret = -E2BIG;
+				goto exit;
+			}
+
+			if (!kdbus_name_is_valid(item->str, true)) {
+				ret = -EINVAL;
+				goto exit;
+			}
+
+			e = kzalloc(sizeof(*e), GFP_KERNEL);
+			if (!e) {
+				ret = -ENOMEM;
+				goto exit;
+			}
+
+			INIT_LIST_HEAD(&e->access_list);
+			e->owner = owner;
+			hlist_add_head(&e->hentry, &entries);
+
+			e->name = kstrdup(item->str, GFP_KERNEL);
+			if (!e->name) {
+				ret = -ENOMEM;
+				goto exit;
+			}
+
+			/*
+			 * If a supplied name ends with an '.*', cut off that
+			 * part, only store anything before it, and mark the
+			 * entry as wildcard.
+			 */
+			len = strlen(e->name);
+			if (len > 2 &&
+			    e->name[len - 3] == '.' &&
+			    e->name[len - 2] == '*') {
+				if (!allow_wildcards) {
+					ret = -EINVAL;
+					goto exit;
+				}
+
+				e->name[len - 3] = '\0';
+				e->wildcard = true;
+			}
+
+			break;
+		}
+
+		case KDBUS_ITEM_POLICY_ACCESS:
+			if (!e) {
+				ret = -EINVAL;
+				goto exit;
+			}
+
+			a = kdbus_policy_make_access(&item->policy_access);
+			if (IS_ERR(a)) {
+				ret = PTR_ERR(a);
+				goto exit;
+			}
+
+			list_add_tail(&a->list, &e->access_list);
+			break;
+		}
+	}
+
+	down_write(&db->entries_rwlock);
+
+	/* remember previous entries to restore in case of failure */
+	hash_for_each_safe(db->entries_hash, i, tmp, e, hentry)
+		if (e->owner == owner) {
+			hash_del(&e->hentry);
+			hlist_add_head(&e->hentry, &restore);
+		}
+
+	hlist_for_each_entry_safe(e, tmp, &entries, hentry) {
+		/* prevent duplicates */
+		hash = kdbus_strhash(e->name);
+		hash_for_each_possible(db->entries_hash, p, hentry, hash)
+			if (strcmp(e->name, p->name) == 0 &&
+			    e->wildcard == p->wildcard) {
+				ret = -EEXIST;
+				goto restore;
+			}
+
+		hlist_del(&e->hentry);
+		hash_add(db->entries_hash, &e->hentry, hash);
+	}
+
+restore:
+	/* if we failed, flush all entries we added so far */
+	if (ret < 0)
+		__kdbus_policy_remove_owner(db, owner);
+
+	/* if we failed, restore entries, otherwise release them */
+	hlist_for_each_entry_safe(e, tmp, &restore, hentry) {
+		hlist_del(&e->hentry);
+		if (ret < 0) {
+			hash = kdbus_strhash(e->name);
+			hash_add(db->entries_hash, &e->hentry, hash);
+		} else {
+			kdbus_policy_entry_free(e);
+		}
+	}
+
+	up_write(&db->entries_rwlock);
+
+exit:
+	hlist_for_each_entry_safe(e, tmp, &entries, hentry) {
+		hlist_del(&e->hentry);
+		kdbus_policy_entry_free(e);
+	}
+
+	return ret;
+}
diff -Nur linux-4.2/ipc/kdbus/policy.h linux-4.2-pck/ipc/kdbus/policy.h
--- linux-4.2/ipc/kdbus/policy.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/policy.h	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_POLICY_H
+#define __KDBUS_POLICY_H
+
+#include <linux/hashtable.h>
+#include <linux/rwsem.h>
+
+struct kdbus_conn;
+struct kdbus_item;
+
+/**
+ * struct kdbus_policy_db - policy database
+ * @entries_hash:	Hashtable of entries
+ * @entries_rwlock:	Mutex to protect the database's access entries
+ */
+struct kdbus_policy_db {
+	DECLARE_HASHTABLE(entries_hash, 6);
+	struct rw_semaphore entries_rwlock;
+};
+
+void kdbus_policy_db_init(struct kdbus_policy_db *db);
+void kdbus_policy_db_clear(struct kdbus_policy_db *db);
+
+int kdbus_policy_query_unlocked(struct kdbus_policy_db *db,
+				const struct cred *cred, const char *name,
+				unsigned int hash);
+int kdbus_policy_query(struct kdbus_policy_db *db, const struct cred *cred,
+		       const char *name, unsigned int hash);
+
+void kdbus_policy_remove_owner(struct kdbus_policy_db *db,
+			       const void *owner);
+int kdbus_policy_set(struct kdbus_policy_db *db,
+		     const struct kdbus_item *items,
+		     size_t items_size,
+		     size_t max_policies,
+		     bool allow_wildcards,
+		     const void *owner);
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/pool.c linux-4.2-pck/ipc/kdbus/pool.c
--- linux-4.2/ipc/kdbus/pool.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/pool.c	2015-09-08 00:55:40.120721602 -0300
@@ -0,0 +1,728 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/aio.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/shmem_fs.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/uio.h>
+
+#include "pool.h"
+#include "util.h"
+
+/**
+ * struct kdbus_pool - the receiver's buffer
+ * @f:			The backing shmem file
+ * @size:		The size of the file
+ * @accounted_size:	Currently accounted memory in bytes
+ * @lock:		Pool data lock
+ * @slices:		All slices sorted by address
+ * @slices_busy:	Tree of allocated slices
+ * @slices_free:	Tree of free slices
+ *
+ * The receiver's buffer, managed as a pool of allocated and free
+ * slices containing the queued messages.
+ *
+ * Messages sent with KDBUS_CMD_SEND are copied directly by the
+ * sending process into the receiver's pool.
+ *
+ * Messages received with KDBUS_CMD_RECV just return the offset
+ * to the data placed in the pool.
+ *
+ * The internally allocated memory needs to be returned by the receiver
+ * with KDBUS_CMD_FREE.
+ */
+struct kdbus_pool {
+	struct file *f;
+	size_t size;
+	size_t accounted_size;
+	struct mutex lock;
+
+	struct list_head slices;
+	struct rb_root slices_busy;
+	struct rb_root slices_free;
+};
+
+/**
+ * struct kdbus_pool_slice - allocated element in kdbus_pool
+ * @pool:		Pool this slice belongs to
+ * @off:		Offset of slice in the shmem file
+ * @size:		Size of slice
+ * @entry:		Entry in "all slices" list
+ * @rb_node:		Entry in free or busy list
+ * @free:		Unused slice
+ * @accounted:		Accounted as queue slice
+ * @ref_kernel:		Kernel holds a reference
+ * @ref_user:		Userspace holds a reference
+ *
+ * The pool has one or more slices, always spanning the entire size of the
+ * pool.
+ *
+ * Every slice is an element in a list sorted by the buffer address, to
+ * provide access to the next neighbor slice.
+ *
+ * Every slice is member in either the busy or the free tree. The free
+ * tree is organized by slice size, the busy tree organized by buffer
+ * offset.
+ */
+struct kdbus_pool_slice {
+	struct kdbus_pool *pool;
+	size_t off;
+	size_t size;
+
+	struct list_head entry;
+	struct rb_node rb_node;
+
+	bool free:1;
+	bool accounted:1;
+	bool ref_kernel:1;
+	bool ref_user:1;
+};
+
+static struct kdbus_pool_slice *kdbus_pool_slice_new(struct kdbus_pool *pool,
+						     size_t off, size_t size)
+{
+	struct kdbus_pool_slice *slice;
+
+	slice = kzalloc(sizeof(*slice), GFP_KERNEL);
+	if (!slice)
+		return NULL;
+
+	slice->pool = pool;
+	slice->off = off;
+	slice->size = size;
+	slice->free = true;
+	return slice;
+}
+
+/* insert a slice into the free tree */
+static void kdbus_pool_add_free_slice(struct kdbus_pool *pool,
+				      struct kdbus_pool_slice *slice)
+{
+	struct rb_node **n;
+	struct rb_node *pn = NULL;
+
+	n = &pool->slices_free.rb_node;
+	while (*n) {
+		struct kdbus_pool_slice *pslice;
+
+		pn = *n;
+		pslice = rb_entry(pn, struct kdbus_pool_slice, rb_node);
+		if (slice->size < pslice->size)
+			n = &pn->rb_left;
+		else
+			n = &pn->rb_right;
+	}
+
+	rb_link_node(&slice->rb_node, pn, n);
+	rb_insert_color(&slice->rb_node, &pool->slices_free);
+}
+
+/* insert a slice into the busy tree */
+static void kdbus_pool_add_busy_slice(struct kdbus_pool *pool,
+				      struct kdbus_pool_slice *slice)
+{
+	struct rb_node **n;
+	struct rb_node *pn = NULL;
+
+	n = &pool->slices_busy.rb_node;
+	while (*n) {
+		struct kdbus_pool_slice *pslice;
+
+		pn = *n;
+		pslice = rb_entry(pn, struct kdbus_pool_slice, rb_node);
+		if (slice->off < pslice->off)
+			n = &pn->rb_left;
+		else if (slice->off > pslice->off)
+			n = &pn->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&slice->rb_node, pn, n);
+	rb_insert_color(&slice->rb_node, &pool->slices_busy);
+}
+
+static struct kdbus_pool_slice *kdbus_pool_find_slice(struct kdbus_pool *pool,
+						      size_t off)
+{
+	struct rb_node *n;
+
+	n = pool->slices_busy.rb_node;
+	while (n) {
+		struct kdbus_pool_slice *s;
+
+		s = rb_entry(n, struct kdbus_pool_slice, rb_node);
+		if (off < s->off)
+			n = n->rb_left;
+		else if (off > s->off)
+			n = n->rb_right;
+		else
+			return s;
+	}
+
+	return NULL;
+}
+
+/**
+ * kdbus_pool_slice_alloc() - allocate memory from a pool
+ * @pool:	The receiver's pool
+ * @size:	The number of bytes to allocate
+ * @accounted:	Whether this slice should be accounted for
+ *
+ * The returned slice is used for kdbus_pool_slice_release() to
+ * free the allocated memory. If either @kvec or @iovec is non-NULL, the data
+ * will be copied from kernel or userspace memory into the new slice at
+ * offset 0.
+ *
+ * Return: the allocated slice on success, ERR_PTR on failure.
+ */
+struct kdbus_pool_slice *kdbus_pool_slice_alloc(struct kdbus_pool *pool,
+						size_t size, bool accounted)
+{
+	size_t slice_size = KDBUS_ALIGN8(size);
+	struct rb_node *n, *found = NULL;
+	struct kdbus_pool_slice *s;
+	int ret = 0;
+
+	if (WARN_ON(!size))
+		return ERR_PTR(-EINVAL);
+
+	/* search a free slice with the closest matching size */
+	mutex_lock(&pool->lock);
+	n = pool->slices_free.rb_node;
+	while (n) {
+		s = rb_entry(n, struct kdbus_pool_slice, rb_node);
+		if (slice_size < s->size) {
+			found = n;
+			n = n->rb_left;
+		} else if (slice_size > s->size) {
+			n = n->rb_right;
+		} else {
+			found = n;
+			break;
+		}
+	}
+
+	/* no slice with the minimum size found in the pool */
+	if (!found) {
+		ret = -EXFULL;
+		goto exit_unlock;
+	}
+
+	/* no exact match, use the closest one */
+	if (!n) {
+		struct kdbus_pool_slice *s_new;
+
+		s = rb_entry(found, struct kdbus_pool_slice, rb_node);
+
+		/* split-off the remainder of the size to its own slice */
+		s_new = kdbus_pool_slice_new(pool, s->off + slice_size,
+					     s->size - slice_size);
+		if (!s_new) {
+			ret = -ENOMEM;
+			goto exit_unlock;
+		}
+
+		list_add(&s_new->entry, &s->entry);
+		kdbus_pool_add_free_slice(pool, s_new);
+
+		/* adjust our size now that we split-off another slice */
+		s->size = slice_size;
+	}
+
+	/* move slice from free to the busy tree */
+	rb_erase(found, &pool->slices_free);
+	kdbus_pool_add_busy_slice(pool, s);
+
+	WARN_ON(s->ref_kernel || s->ref_user);
+
+	s->ref_kernel = true;
+	s->free = false;
+	s->accounted = accounted;
+	if (accounted)
+		pool->accounted_size += s->size;
+	mutex_unlock(&pool->lock);
+
+	return s;
+
+exit_unlock:
+	mutex_unlock(&pool->lock);
+	return ERR_PTR(ret);
+}
+
+static void __kdbus_pool_slice_release(struct kdbus_pool_slice *slice)
+{
+	struct kdbus_pool *pool = slice->pool;
+
+	/* don't free the slice if either has a reference */
+	if (slice->ref_kernel || slice->ref_user)
+		return;
+
+	if (WARN_ON(slice->free))
+		return;
+
+	rb_erase(&slice->rb_node, &pool->slices_busy);
+
+	/* merge with the next free slice */
+	if (!list_is_last(&slice->entry, &pool->slices)) {
+		struct kdbus_pool_slice *s;
+
+		s = list_entry(slice->entry.next,
+			       struct kdbus_pool_slice, entry);
+		if (s->free) {
+			rb_erase(&s->rb_node, &pool->slices_free);
+			list_del(&s->entry);
+			slice->size += s->size;
+			kfree(s);
+		}
+	}
+
+	/* merge with previous free slice */
+	if (pool->slices.next != &slice->entry) {
+		struct kdbus_pool_slice *s;
+
+		s = list_entry(slice->entry.prev,
+			       struct kdbus_pool_slice, entry);
+		if (s->free) {
+			rb_erase(&s->rb_node, &pool->slices_free);
+			list_del(&slice->entry);
+			s->size += slice->size;
+			kfree(slice);
+			slice = s;
+		}
+	}
+
+	slice->free = true;
+	kdbus_pool_add_free_slice(pool, slice);
+}
+
+/**
+ * kdbus_pool_slice_release() - drop kernel-reference on allocated slice
+ * @slice:		Slice allocated from the pool
+ *
+ * This releases the kernel-reference on the given slice. If the
+ * kernel-reference and the user-reference on a slice are dropped, the slice is
+ * returned to the pool.
+ *
+ * So far, we do not implement full ref-counting on slices. Each, kernel and
+ * user-space can have exactly one reference to a slice. If both are dropped at
+ * the same time, the slice is released.
+ */
+void kdbus_pool_slice_release(struct kdbus_pool_slice *slice)
+{
+	struct kdbus_pool *pool;
+
+	if (!slice)
+		return;
+
+	/* @slice may be freed, so keep local ptr to @pool */
+	pool = slice->pool;
+
+	mutex_lock(&pool->lock);
+	/* kernel must own a ref to @slice to drop it */
+	WARN_ON(!slice->ref_kernel);
+	slice->ref_kernel = false;
+	/* no longer kernel-owned, de-account slice */
+	if (slice->accounted && !WARN_ON(pool->accounted_size < slice->size))
+		pool->accounted_size -= slice->size;
+	__kdbus_pool_slice_release(slice);
+	mutex_unlock(&pool->lock);
+}
+
+/**
+ * kdbus_pool_release_offset() - release a public offset
+ * @pool:		pool to operate on
+ * @off:		offset to release
+ *
+ * This should be called whenever user-space frees a slice given to them. It
+ * verifies the slice is available and public, and then drops it. It ensures
+ * correct locking and barriers against queues.
+ *
+ * Return: 0 on success, ENXIO if the offset is invalid or not public.
+ */
+int kdbus_pool_release_offset(struct kdbus_pool *pool, size_t off)
+{
+	struct kdbus_pool_slice *slice;
+	int ret = 0;
+
+	/* 'pool->size' is used as dummy offset for empty slices */
+	if (off == pool->size)
+		return 0;
+
+	mutex_lock(&pool->lock);
+	slice = kdbus_pool_find_slice(pool, off);
+	if (slice && slice->ref_user) {
+		slice->ref_user = false;
+		__kdbus_pool_slice_release(slice);
+	} else {
+		ret = -ENXIO;
+	}
+	mutex_unlock(&pool->lock);
+
+	return ret;
+}
+
+/**
+ * kdbus_pool_publish_empty() - publish empty slice to user-space
+ * @pool:		pool to operate on
+ * @off:		output storage for offset, or NULL
+ * @size:		output storage for size, or NULL
+ *
+ * This is the same as kdbus_pool_slice_publish(), but uses a dummy slice with
+ * size 0. The returned offset points to the end of the pool and is never
+ * returned on real slices.
+ */
+void kdbus_pool_publish_empty(struct kdbus_pool *pool, u64 *off, u64 *size)
+{
+	if (off)
+		*off = pool->size;
+	if (size)
+		*size = 0;
+}
+
+/**
+ * kdbus_pool_slice_publish() - publish slice to user-space
+ * @slice:		The slice
+ * @out_offset:		Output storage for offset, or NULL
+ * @out_size:		Output storage for size, or NULL
+ *
+ * This prepares a slice to be published to user-space.
+ *
+ * This call combines the following operations:
+ *   * the memory region is flushed so the user's memory view is consistent
+ *   * the slice is marked as referenced by user-space, so user-space has to
+ *     call KDBUS_CMD_FREE to release it
+ *   * the offset and size of the slice are written to the given output
+ *     arguments, if non-NULL
+ */
+void kdbus_pool_slice_publish(struct kdbus_pool_slice *slice,
+			      u64 *out_offset, u64 *out_size)
+{
+	mutex_lock(&slice->pool->lock);
+	/* kernel must own a ref to @slice to gain a user-space ref */
+	WARN_ON(!slice->ref_kernel);
+	slice->ref_user = true;
+	mutex_unlock(&slice->pool->lock);
+
+	if (out_offset)
+		*out_offset = slice->off;
+	if (out_size)
+		*out_size = slice->size;
+}
+
+/**
+ * kdbus_pool_slice_offset() - Get a slice's offset inside the pool
+ * @slice:	Slice to return the offset of
+ *
+ * Return: The internal offset @slice inside the pool.
+ */
+off_t kdbus_pool_slice_offset(const struct kdbus_pool_slice *slice)
+{
+	return slice->off;
+}
+
+/**
+ * kdbus_pool_slice_size() - get size of a pool slice
+ * @slice:	slice to query
+ *
+ * Return: size of the given slice
+ */
+size_t kdbus_pool_slice_size(const struct kdbus_pool_slice *slice)
+{
+	return slice->size;
+}
+
+/**
+ * kdbus_pool_new() - create a new pool
+ * @name:		Name of the (deleted) file which shows up in
+ *			/proc, used for debugging
+ * @size:		Maximum size of the pool
+ *
+ * Return: a new kdbus_pool on success, ERR_PTR on failure.
+ */
+struct kdbus_pool *kdbus_pool_new(const char *name, size_t size)
+{
+	struct kdbus_pool_slice *s;
+	struct kdbus_pool *p;
+	struct file *f;
+	char *n = NULL;
+	int ret;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return ERR_PTR(-ENOMEM);
+
+	if (name) {
+		n = kasprintf(GFP_KERNEL, KBUILD_MODNAME "-conn:%s", name);
+		if (!n) {
+			ret = -ENOMEM;
+			goto exit_free;
+		}
+	}
+
+	f = shmem_file_setup(n ?: KBUILD_MODNAME "-conn", size, 0, 0);
+	kfree(n);
+
+	if (IS_ERR(f)) {
+		ret = PTR_ERR(f);
+		goto exit_free;
+	}
+
+	ret = get_write_access(file_inode(f));
+	if (ret < 0)
+		goto exit_put_shmem;
+
+	/* allocate first slice spanning the entire pool */
+	s = kdbus_pool_slice_new(p, 0, size);
+	if (!s) {
+		ret = -ENOMEM;
+		goto exit_put_write;
+	}
+
+	p->f = f;
+	p->size = size;
+	p->slices_free = RB_ROOT;
+	p->slices_busy = RB_ROOT;
+	mutex_init(&p->lock);
+
+	INIT_LIST_HEAD(&p->slices);
+	list_add(&s->entry, &p->slices);
+
+	kdbus_pool_add_free_slice(p, s);
+	return p;
+
+exit_put_write:
+	put_write_access(file_inode(f));
+exit_put_shmem:
+	fput(f);
+exit_free:
+	kfree(p);
+	return ERR_PTR(ret);
+}
+
+/**
+ * kdbus_pool_free() - destroy pool
+ * @pool:		The receiver's pool
+ */
+void kdbus_pool_free(struct kdbus_pool *pool)
+{
+	struct kdbus_pool_slice *s, *tmp;
+
+	if (!pool)
+		return;
+
+	list_for_each_entry_safe(s, tmp, &pool->slices, entry) {
+		list_del(&s->entry);
+		kfree(s);
+	}
+
+	put_write_access(file_inode(pool->f));
+	fput(pool->f);
+	kfree(pool);
+}
+
+/**
+ * kdbus_pool_accounted() - retrieve accounting information
+ * @pool:		pool to query
+ * @size:		output for overall pool size
+ * @acc:		output for currently accounted size
+ *
+ * This returns accounting information of the pool. Note that the data might
+ * change after the function returns, as the pool lock is dropped. You need to
+ * protect the data via other means, if you need reliable accounting.
+ */
+void kdbus_pool_accounted(struct kdbus_pool *pool, size_t *size, size_t *acc)
+{
+	mutex_lock(&pool->lock);
+	if (size)
+		*size = pool->size;
+	if (acc)
+		*acc = pool->accounted_size;
+	mutex_unlock(&pool->lock);
+}
+
+/**
+ * kdbus_pool_slice_copy_iovec() - copy user memory to a slice
+ * @slice:		The slice to write to
+ * @off:		Offset in the slice to write to
+ * @iov:		iovec array, pointing to data to copy
+ * @iov_len:		Number of elements in @iov
+ * @total_len:		Total number of bytes described in members of @iov
+ *
+ * User memory referenced by @iov will be copied into @slice at offset @off.
+ *
+ * Return: the numbers of bytes copied, negative errno on failure.
+ */
+ssize_t
+kdbus_pool_slice_copy_iovec(const struct kdbus_pool_slice *slice, loff_t off,
+			    struct iovec *iov, size_t iov_len, size_t total_len)
+{
+	struct iov_iter iter;
+	ssize_t len;
+
+	if (WARN_ON(off + total_len > slice->size))
+		return -EFAULT;
+
+	off += slice->off;
+	iov_iter_init(&iter, WRITE, iov, iov_len, total_len);
+	len = vfs_iter_write(slice->pool->f, &iter, &off);
+
+	return (len >= 0 && len != total_len) ? -EFAULT : len;
+}
+
+/**
+ * kdbus_pool_slice_copy_kvec() - copy kernel memory to a slice
+ * @slice:		The slice to write to
+ * @off:		Offset in the slice to write to
+ * @kvec:		kvec array, pointing to data to copy
+ * @kvec_len:		Number of elements in @kvec
+ * @total_len:		Total number of bytes described in members of @kvec
+ *
+ * Kernel memory referenced by @kvec will be copied into @slice at offset @off.
+ *
+ * Return: the numbers of bytes copied, negative errno on failure.
+ */
+ssize_t kdbus_pool_slice_copy_kvec(const struct kdbus_pool_slice *slice,
+				   loff_t off, struct kvec *kvec,
+				   size_t kvec_len, size_t total_len)
+{
+	struct iov_iter iter;
+	mm_segment_t old_fs;
+	ssize_t len;
+
+	if (WARN_ON(off + total_len > slice->size))
+		return -EFAULT;
+
+	off += slice->off;
+	iov_iter_kvec(&iter, WRITE | ITER_KVEC, kvec, kvec_len, total_len);
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	len = vfs_iter_write(slice->pool->f, &iter, &off);
+	set_fs(old_fs);
+
+	return (len >= 0 && len != total_len) ? -EFAULT : len;
+}
+
+/**
+ * kdbus_pool_slice_copy() - copy data from one slice into another
+ * @slice_dst:		destination slice
+ * @slice_src:		source slice
+ *
+ * Return: 0 on success, negative error number on failure.
+ */
+int kdbus_pool_slice_copy(const struct kdbus_pool_slice *slice_dst,
+			  const struct kdbus_pool_slice *slice_src)
+{
+	struct file *f_src = slice_src->pool->f;
+	struct file *f_dst = slice_dst->pool->f;
+	struct inode *i_dst = file_inode(f_dst);
+	struct address_space *mapping_dst = f_dst->f_mapping;
+	const struct address_space_operations *aops = mapping_dst->a_ops;
+	unsigned long len = slice_src->size;
+	loff_t off_src = slice_src->off;
+	loff_t off_dst = slice_dst->off;
+	mm_segment_t old_fs;
+	int ret = 0;
+
+	if (WARN_ON(slice_src->size != slice_dst->size) ||
+	    WARN_ON(slice_src->free || slice_dst->free))
+		return -EINVAL;
+
+	mutex_lock(&i_dst->i_mutex);
+	old_fs = get_fs();
+	set_fs(get_ds());
+	while (len > 0) {
+		unsigned long page_off;
+		unsigned long copy_len;
+		char __user *kaddr;
+		struct page *page;
+		ssize_t n_read;
+		void *fsdata;
+		long status;
+
+		page_off = off_dst & (PAGE_CACHE_SIZE - 1);
+		copy_len = min_t(unsigned long,
+				 PAGE_CACHE_SIZE - page_off, len);
+
+		status = aops->write_begin(f_dst, mapping_dst, off_dst,
+					   copy_len, 0, &page, &fsdata);
+		if (unlikely(status < 0)) {
+			ret = status;
+			break;
+		}
+
+		kaddr = (char __force __user *)kmap(page) + page_off;
+		n_read = __vfs_read(f_src, kaddr, copy_len, &off_src);
+		kunmap(page);
+		mark_page_accessed(page);
+		flush_dcache_page(page);
+
+		if (unlikely(n_read != copy_len)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		status = aops->write_end(f_dst, mapping_dst, off_dst,
+					 copy_len, copy_len, page, fsdata);
+		if (unlikely(status != copy_len)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		off_dst += copy_len;
+		len -= copy_len;
+	}
+	set_fs(old_fs);
+	mutex_unlock(&i_dst->i_mutex);
+
+	return ret;
+}
+
+/**
+ * kdbus_pool_mmap() -  map the pool into the process
+ * @pool:		The receiver's pool
+ * @vma:		passed by mmap() syscall
+ *
+ * Return: the result of the mmap() call, negative errno on failure.
+ */
+int kdbus_pool_mmap(const struct kdbus_pool *pool, struct vm_area_struct *vma)
+{
+	/* deny write access to the pool */
+	if (vma->vm_flags & VM_WRITE)
+		return -EPERM;
+	vma->vm_flags &= ~VM_MAYWRITE;
+
+	/* do not allow to map more than the size of the file */
+	if ((vma->vm_end - vma->vm_start) > pool->size)
+		return -EFAULT;
+
+	/* replace the connection file with our shmem file */
+	if (vma->vm_file)
+		fput(vma->vm_file);
+	vma->vm_file = get_file(pool->f);
+
+	return pool->f->f_op->mmap(pool->f, vma);
+}
diff -Nur linux-4.2/ipc/kdbus/pool.h linux-4.2-pck/ipc/kdbus/pool.h
--- linux-4.2/ipc/kdbus/pool.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/pool.h	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_POOL_H
+#define __KDBUS_POOL_H
+
+#include <linux/uio.h>
+
+struct kdbus_pool;
+struct kdbus_pool_slice;
+
+struct kdbus_pool *kdbus_pool_new(const char *name, size_t size);
+void kdbus_pool_free(struct kdbus_pool *pool);
+void kdbus_pool_accounted(struct kdbus_pool *pool, size_t *size, size_t *acc);
+int kdbus_pool_mmap(const struct kdbus_pool *pool, struct vm_area_struct *vma);
+int kdbus_pool_release_offset(struct kdbus_pool *pool, size_t off);
+void kdbus_pool_publish_empty(struct kdbus_pool *pool, u64 *off, u64 *size);
+
+struct kdbus_pool_slice *kdbus_pool_slice_alloc(struct kdbus_pool *pool,
+						size_t size, bool accounted);
+void kdbus_pool_slice_release(struct kdbus_pool_slice *slice);
+void kdbus_pool_slice_publish(struct kdbus_pool_slice *slice,
+			      u64 *out_offset, u64 *out_size);
+off_t kdbus_pool_slice_offset(const struct kdbus_pool_slice *slice);
+size_t kdbus_pool_slice_size(const struct kdbus_pool_slice *slice);
+int kdbus_pool_slice_copy(const struct kdbus_pool_slice *slice_dst,
+			  const struct kdbus_pool_slice *slice_src);
+ssize_t kdbus_pool_slice_copy_kvec(const struct kdbus_pool_slice *slice,
+				   loff_t off, struct kvec *kvec,
+				   size_t kvec_count, size_t total_len);
+ssize_t kdbus_pool_slice_copy_iovec(const struct kdbus_pool_slice *slice,
+				    loff_t off, struct iovec *iov,
+				    size_t iov_count, size_t total_len);
+
+#endif
diff -Nur linux-4.2/ipc/kdbus/queue.c linux-4.2-pck/ipc/kdbus/queue.c
--- linux-4.2/ipc/kdbus/queue.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/queue.c	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,363 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/audit.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/hashtable.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/math64.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/uio.h>
+
+#include "util.h"
+#include "domain.h"
+#include "connection.h"
+#include "item.h"
+#include "message.h"
+#include "metadata.h"
+#include "queue.h"
+#include "reply.h"
+
+/**
+ * kdbus_queue_init() - initialize data structure related to a queue
+ * @queue:	The queue to initialize
+ */
+void kdbus_queue_init(struct kdbus_queue *queue)
+{
+	INIT_LIST_HEAD(&queue->msg_list);
+	queue->msg_prio_queue = RB_ROOT;
+}
+
+/**
+ * kdbus_queue_peek() - Retrieves an entry from a queue
+ * @queue:		The queue
+ * @priority:		The minimum priority of the entry to peek
+ * @use_priority:	Boolean flag whether or not to peek by priority
+ *
+ * Look for a entry in a queue, either by priority, or the oldest one (FIFO).
+ * The entry is not freed, put off the queue's lists or anything else.
+ *
+ * Return: the peeked queue entry on success, NULL if no suitable msg is found
+ */
+struct kdbus_queue_entry *kdbus_queue_peek(struct kdbus_queue *queue,
+					   s64 priority, bool use_priority)
+{
+	struct kdbus_queue_entry *e;
+
+	if (list_empty(&queue->msg_list))
+		return NULL;
+
+	if (use_priority) {
+		/* get next entry with highest priority */
+		e = rb_entry(queue->msg_prio_highest,
+			     struct kdbus_queue_entry, prio_node);
+
+		/* no entry with the requested priority */
+		if (e->priority > priority)
+			return NULL;
+	} else {
+		/* ignore the priority, return the next entry in the entry */
+		e = list_first_entry(&queue->msg_list,
+				     struct kdbus_queue_entry, entry);
+	}
+
+	return e;
+}
+
+static void kdbus_queue_entry_link(struct kdbus_queue_entry *entry)
+{
+	struct kdbus_queue *queue = &entry->conn->queue;
+	struct rb_node **n, *pn = NULL;
+	bool highest = true;
+
+	lockdep_assert_held(&entry->conn->lock);
+	if (WARN_ON(!list_empty(&entry->entry)))
+		return;
+
+	/* sort into priority entry tree */
+	n = &queue->msg_prio_queue.rb_node;
+	while (*n) {
+		struct kdbus_queue_entry *e;
+
+		pn = *n;
+		e = rb_entry(pn, struct kdbus_queue_entry, prio_node);
+
+		/* existing node for this priority, add to its list */
+		if (likely(entry->priority == e->priority)) {
+			list_add_tail(&entry->prio_entry, &e->prio_entry);
+			goto prio_done;
+		}
+
+		if (entry->priority < e->priority) {
+			n = &pn->rb_left;
+		} else {
+			n = &pn->rb_right;
+			highest = false;
+		}
+	}
+
+	/* cache highest-priority entry */
+	if (highest)
+		queue->msg_prio_highest = &entry->prio_node;
+
+	/* new node for this priority */
+	rb_link_node(&entry->prio_node, pn, n);
+	rb_insert_color(&entry->prio_node, &queue->msg_prio_queue);
+	INIT_LIST_HEAD(&entry->prio_entry);
+
+prio_done:
+	/* add to unsorted fifo list */
+	list_add_tail(&entry->entry, &queue->msg_list);
+}
+
+static void kdbus_queue_entry_unlink(struct kdbus_queue_entry *entry)
+{
+	struct kdbus_queue *queue = &entry->conn->queue;
+
+	lockdep_assert_held(&entry->conn->lock);
+	if (list_empty(&entry->entry))
+		return;
+
+	list_del_init(&entry->entry);
+
+	if (list_empty(&entry->prio_entry)) {
+		/*
+		 * Single entry for this priority, update cached
+		 * highest-priority entry, remove the tree node.
+		 */
+		if (queue->msg_prio_highest == &entry->prio_node)
+			queue->msg_prio_highest = rb_next(&entry->prio_node);
+
+		rb_erase(&entry->prio_node, &queue->msg_prio_queue);
+	} else {
+		struct kdbus_queue_entry *q;
+
+		/*
+		 * Multiple entries for this priority entry, get next one in
+		 * the list. Update cached highest-priority entry, store the
+		 * new one as the tree node.
+		 */
+		q = list_first_entry(&entry->prio_entry,
+				     struct kdbus_queue_entry, prio_entry);
+		list_del(&entry->prio_entry);
+
+		if (queue->msg_prio_highest == &entry->prio_node)
+			queue->msg_prio_highest = &q->prio_node;
+
+		rb_replace_node(&entry->prio_node, &q->prio_node,
+				&queue->msg_prio_queue);
+	}
+}
+
+/**
+ * kdbus_queue_entry_new() - allocate a queue entry
+ * @src:	source connection, or NULL
+ * @dst:	destination connection
+ * @s:		staging object carrying the message
+ *
+ * Allocates a queue entry based on a given msg and allocate space for
+ * the message payload and the requested metadata in the connection's pool.
+ * The entry is not actually added to the queue's lists at this point.
+ *
+ * Return: the allocated entry on success, or an ERR_PTR on failures.
+ */
+struct kdbus_queue_entry *kdbus_queue_entry_new(struct kdbus_conn *src,
+						struct kdbus_conn *dst,
+						struct kdbus_staging *s)
+{
+	struct kdbus_queue_entry *entry;
+	int ret;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&entry->entry);
+	entry->priority = s->msg->priority;
+	entry->conn = kdbus_conn_ref(dst);
+	entry->gaps = kdbus_gaps_ref(s->gaps);
+
+	entry->slice = kdbus_staging_emit(s, src, dst);
+	if (IS_ERR(entry->slice)) {
+		ret = PTR_ERR(entry->slice);
+		entry->slice = NULL;
+		goto error;
+	}
+
+	entry->user = src ? kdbus_user_ref(src->user) : NULL;
+	return entry;
+
+error:
+	kdbus_queue_entry_free(entry);
+	return ERR_PTR(ret);
+}
+
+/**
+ * kdbus_queue_entry_free() - free resources of an entry
+ * @entry:	The entry to free
+ *
+ * Removes resources allocated by a queue entry, along with the entry itself.
+ * Note that the entry's slice is not freed at this point.
+ */
+void kdbus_queue_entry_free(struct kdbus_queue_entry *entry)
+{
+	if (!entry)
+		return;
+
+	lockdep_assert_held(&entry->conn->lock);
+
+	kdbus_queue_entry_unlink(entry);
+	kdbus_reply_unref(entry->reply);
+
+	if (entry->slice) {
+		kdbus_conn_quota_dec(entry->conn, entry->user,
+				     kdbus_pool_slice_size(entry->slice),
+				     entry->gaps ? entry->gaps->n_fds : 0);
+		kdbus_pool_slice_release(entry->slice);
+	}
+
+	kdbus_user_unref(entry->user);
+	kdbus_gaps_unref(entry->gaps);
+	kdbus_conn_unref(entry->conn);
+	kfree(entry);
+}
+
+/**
+ * kdbus_queue_entry_install() - install message components into the
+ *				 receiver's process
+ * @entry:		The queue entry to install
+ * @return_flags:	Pointer to store the return flags for userspace
+ * @install_fds:	Whether or not to install associated file descriptors
+ *
+ * Return: 0 on success.
+ */
+int kdbus_queue_entry_install(struct kdbus_queue_entry *entry,
+			      u64 *return_flags, bool install_fds)
+{
+	bool incomplete_fds = false;
+	int ret;
+
+	lockdep_assert_held(&entry->conn->lock);
+
+	ret = kdbus_gaps_install(entry->gaps, entry->slice, &incomplete_fds);
+	if (ret < 0)
+		return ret;
+
+	if (incomplete_fds)
+		*return_flags |= KDBUS_RECV_RETURN_INCOMPLETE_FDS;
+	return 0;
+}
+
+/**
+ * kdbus_queue_entry_enqueue() - enqueue an entry
+ * @entry:		entry to enqueue
+ * @reply:		reply to link to this entry (or NULL if none)
+ *
+ * This enqueues an unqueued entry into the message queue of the linked
+ * connection. It also binds a reply object to the entry so we can remember it
+ * when the message is moved.
+ *
+ * Once this call returns (and the connection lock is released), this entry can
+ * be dequeued by the target connection. Note that the entry will not be removed
+ * from the queue until it is destroyed.
+ */
+void kdbus_queue_entry_enqueue(struct kdbus_queue_entry *entry,
+			       struct kdbus_reply *reply)
+{
+	lockdep_assert_held(&entry->conn->lock);
+
+	if (WARN_ON(entry->reply) || WARN_ON(!list_empty(&entry->entry)))
+		return;
+
+	entry->reply = kdbus_reply_ref(reply);
+	kdbus_queue_entry_link(entry);
+}
+
+/**
+ * kdbus_queue_entry_move() - move queue entry
+ * @e:		queue entry to move
+ * @dst:	destination connection to queue the entry on
+ *
+ * This moves a queue entry onto a different connection. It allocates a new
+ * slice on the target connection and copies the message over. If the copy
+ * succeeded, we move the entry from @src to @dst.
+ *
+ * On failure, the entry is left untouched.
+ *
+ * The queue entry must be queued right now, and after the call succeeds it will
+ * be queued on the destination, but no longer on the source.
+ *
+ * The caller must hold the connection lock of the source *and* destination.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kdbus_queue_entry_move(struct kdbus_queue_entry *e,
+			   struct kdbus_conn *dst)
+{
+	struct kdbus_pool_slice *slice = NULL;
+	struct kdbus_conn *src = e->conn;
+	size_t size, fds;
+	int ret;
+
+	lockdep_assert_held(&src->lock);
+	lockdep_assert_held(&dst->lock);
+
+	if (WARN_ON(list_empty(&e->entry)))
+		return -EINVAL;
+	if (src == dst)
+		return 0;
+
+	size = kdbus_pool_slice_size(e->slice);
+	fds = e->gaps ? e->gaps->n_fds : 0;
+
+	ret = kdbus_conn_quota_inc(dst, e->user, size, fds);
+	if (ret < 0)
+		return ret;
+
+	slice = kdbus_pool_slice_alloc(dst->pool, size, true);
+	if (IS_ERR(slice)) {
+		ret = PTR_ERR(slice);
+		slice = NULL;
+		goto error;
+	}
+
+	ret = kdbus_pool_slice_copy(slice, e->slice);
+	if (ret < 0)
+		goto error;
+
+	kdbus_queue_entry_unlink(e);
+	kdbus_conn_quota_dec(src, e->user, size, fds);
+	kdbus_pool_slice_release(e->slice);
+	kdbus_conn_unref(e->conn);
+
+	e->slice = slice;
+	e->conn = kdbus_conn_ref(dst);
+	kdbus_queue_entry_link(e);
+
+	return 0;
+
+error:
+	kdbus_pool_slice_release(slice);
+	kdbus_conn_quota_dec(dst, e->user, size, fds);
+	return ret;
+}
diff -Nur linux-4.2/ipc/kdbus/queue.h linux-4.2-pck/ipc/kdbus/queue.h
--- linux-4.2/ipc/kdbus/queue.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/queue.h	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_QUEUE_H
+#define __KDBUS_QUEUE_H
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
+struct kdbus_conn;
+struct kdbus_pool_slice;
+struct kdbus_reply;
+struct kdbus_staging;
+struct kdbus_user;
+
+/**
+ * struct kdbus_queue - a connection's message queue
+ * @msg_list:		List head for kdbus_queue_entry objects
+ * @msg_prio_queue:	RB tree root for messages, sorted by priority
+ * @msg_prio_highest:	Link to the RB node referencing the message with the
+ *			highest priority in the tree.
+ */
+struct kdbus_queue {
+	struct list_head msg_list;
+	struct rb_root msg_prio_queue;
+	struct rb_node *msg_prio_highest;
+};
+
+/**
+ * struct kdbus_queue_entry - messages waiting to be read
+ * @entry:		Entry in the connection's list
+ * @prio_node:		Entry in the priority queue tree
+ * @prio_entry:		Queue tree node entry in the list of one priority
+ * @priority:		Message priority
+ * @dst_name_id:	The sequence number of the name this message is
+ *			addressed to, 0 for messages sent to an ID
+ * @conn:		Connection this entry is queued on
+ * @gaps:		Gaps object to fill message gaps at RECV time
+ * @user:		User used for accounting
+ * @slice:		Slice in the receiver's pool for the message
+ * @reply:		The reply block if a reply to this message is expected
+ */
+struct kdbus_queue_entry {
+	struct list_head entry;
+	struct rb_node prio_node;
+	struct list_head prio_entry;
+
+	s64 priority;
+	u64 dst_name_id;
+
+	struct kdbus_conn *conn;
+	struct kdbus_gaps *gaps;
+	struct kdbus_user *user;
+	struct kdbus_pool_slice *slice;
+	struct kdbus_reply *reply;
+};
+
+void kdbus_queue_init(struct kdbus_queue *queue);
+struct kdbus_queue_entry *kdbus_queue_peek(struct kdbus_queue *queue,
+					   s64 priority, bool use_priority);
+
+struct kdbus_queue_entry *kdbus_queue_entry_new(struct kdbus_conn *src,
+						struct kdbus_conn *dst,
+						struct kdbus_staging *s);
+void kdbus_queue_entry_free(struct kdbus_queue_entry *entry);
+int kdbus_queue_entry_install(struct kdbus_queue_entry *entry,
+			      u64 *return_flags, bool install_fds);
+void kdbus_queue_entry_enqueue(struct kdbus_queue_entry *entry,
+			       struct kdbus_reply *reply);
+int kdbus_queue_entry_move(struct kdbus_queue_entry *entry,
+			   struct kdbus_conn *dst);
+
+#endif /* __KDBUS_QUEUE_H */
diff -Nur linux-4.2/ipc/kdbus/reply.c linux-4.2-pck/ipc/kdbus/reply.c
--- linux-4.2/ipc/kdbus/reply.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/reply.c	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,252 @@
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+
+#include "bus.h"
+#include "connection.h"
+#include "endpoint.h"
+#include "message.h"
+#include "metadata.h"
+#include "names.h"
+#include "domain.h"
+#include "item.h"
+#include "notify.h"
+#include "policy.h"
+#include "reply.h"
+#include "util.h"
+
+/**
+ * kdbus_reply_new() - Allocate and set up a new kdbus_reply object
+ * @reply_src:		The connection a reply is expected from
+ * @reply_dst:		The connection this reply object belongs to
+ * @msg:		Message associated with the reply
+ * @name_entry:		Name entry used to send the message
+ * @sync:		Whether or not to make this reply synchronous
+ *
+ * Allocate and fill a new kdbus_reply object.
+ *
+ * Return: New kdbus_conn object on success, ERR_PTR on error.
+ */
+struct kdbus_reply *kdbus_reply_new(struct kdbus_conn *reply_src,
+				    struct kdbus_conn *reply_dst,
+				    const struct kdbus_msg *msg,
+				    struct kdbus_name_entry *name_entry,
+				    bool sync)
+{
+	struct kdbus_reply *r;
+	int ret;
+
+	if (atomic_inc_return(&reply_dst->request_count) >
+	    KDBUS_CONN_MAX_REQUESTS_PENDING) {
+		ret = -EMLINK;
+		goto exit_dec_request_count;
+	}
+
+	r = kzalloc(sizeof(*r), GFP_KERNEL);
+	if (!r) {
+		ret = -ENOMEM;
+		goto exit_dec_request_count;
+	}
+
+	kref_init(&r->kref);
+	INIT_LIST_HEAD(&r->entry);
+	r->reply_src = kdbus_conn_ref(reply_src);
+	r->reply_dst = kdbus_conn_ref(reply_dst);
+	r->cookie = msg->cookie;
+	r->name_id = name_entry ? name_entry->name_id : 0;
+	r->deadline_ns = msg->timeout_ns;
+
+	if (sync) {
+		r->sync = true;
+		r->waiting = true;
+	}
+
+	return r;
+
+exit_dec_request_count:
+	atomic_dec(&reply_dst->request_count);
+	return ERR_PTR(ret);
+}
+
+static void __kdbus_reply_free(struct kref *kref)
+{
+	struct kdbus_reply *reply =
+		container_of(kref, struct kdbus_reply, kref);
+
+	atomic_dec(&reply->reply_dst->request_count);
+	kdbus_conn_unref(reply->reply_src);
+	kdbus_conn_unref(reply->reply_dst);
+	kfree(reply);
+}
+
+/**
+ * kdbus_reply_ref() - Increase reference on kdbus_reply
+ * @r:		The reply, may be %NULL
+ *
+ * Return: The reply object with an extra reference
+ */
+struct kdbus_reply *kdbus_reply_ref(struct kdbus_reply *r)
+{
+	if (r)
+		kref_get(&r->kref);
+	return r;
+}
+
+/**
+ * kdbus_reply_unref() - Decrease reference on kdbus_reply
+ * @r:		The reply, may be %NULL
+ *
+ * Return: NULL
+ */
+struct kdbus_reply *kdbus_reply_unref(struct kdbus_reply *r)
+{
+	if (r)
+		kref_put(&r->kref, __kdbus_reply_free);
+	return NULL;
+}
+
+/**
+ * kdbus_reply_link() - Link reply object into target connection
+ * @r:		Reply to link
+ */
+void kdbus_reply_link(struct kdbus_reply *r)
+{
+	if (WARN_ON(!list_empty(&r->entry)))
+		return;
+
+	list_add(&r->entry, &r->reply_dst->reply_list);
+	kdbus_reply_ref(r);
+}
+
+/**
+ * kdbus_reply_unlink() - Unlink reply object from target connection
+ * @r:		Reply to unlink
+ */
+void kdbus_reply_unlink(struct kdbus_reply *r)
+{
+	if (!list_empty(&r->entry)) {
+		list_del_init(&r->entry);
+		kdbus_reply_unref(r);
+	}
+}
+
+/**
+ * kdbus_sync_reply_wakeup() - Wake a synchronously blocking reply
+ * @reply:	The reply object
+ * @err:	Error code to set on the remote side
+ *
+ * Wake up remote peer (method origin) with the appropriate synchronous reply
+ * code.
+ */
+void kdbus_sync_reply_wakeup(struct kdbus_reply *reply, int err)
+{
+	if (WARN_ON(!reply->sync))
+		return;
+
+	reply->waiting = false;
+	reply->err = err;
+	wake_up_interruptible(&reply->reply_dst->wait);
+}
+
+/**
+ * kdbus_reply_find() - Find the corresponding reply object
+ * @replying:	The replying connection or NULL
+ * @reply_dst:	The connection the reply will be sent to
+ *		(method origin)
+ * @cookie:	The cookie of the requesting message
+ *
+ * Lookup a reply object that should be sent as a reply by
+ * @replying to @reply_dst with the given cookie.
+ *
+ * Callers must take the @reply_dst lock.
+ *
+ * Return: the corresponding reply object or NULL if not found
+ */
+struct kdbus_reply *kdbus_reply_find(struct kdbus_conn *replying,
+				     struct kdbus_conn *reply_dst,
+				     u64 cookie)
+{
+	struct kdbus_reply *r;
+
+	list_for_each_entry(r, &reply_dst->reply_list, entry) {
+		if (r->cookie == cookie &&
+		    (!replying || r->reply_src == replying))
+			return r;
+	}
+
+	return NULL;
+}
+
+/**
+ * kdbus_reply_list_scan_work() - Worker callback to scan the replies of a
+ *				  connection for exceeded timeouts
+ * @work:		Work struct of the connection to scan
+ *
+ * Walk the list of replies stored with a connection and look for entries
+ * that have exceeded their timeout. If such an entry is found, a timeout
+ * notification is sent to the waiting peer, and the reply is removed from
+ * the list.
+ *
+ * The work is rescheduled to the nearest timeout found during the list
+ * iteration.
+ */
+void kdbus_reply_list_scan_work(struct work_struct *work)
+{
+	struct kdbus_conn *conn =
+		container_of(work, struct kdbus_conn, work.work);
+	struct kdbus_reply *reply, *reply_tmp;
+	u64 deadline = ~0ULL;
+	u64 now;
+
+	now = ktime_get_ns();
+
+	mutex_lock(&conn->lock);
+	if (!kdbus_conn_active(conn)) {
+		mutex_unlock(&conn->lock);
+		return;
+	}
+
+	list_for_each_entry_safe(reply, reply_tmp, &conn->reply_list, entry) {
+		/*
+		 * If the reply block is waiting for synchronous I/O,
+		 * the timeout is handled by wait_event_*_timeout(),
+		 * so we don't have to care for it here.
+		 */
+		if (reply->sync && !reply->interrupted)
+			continue;
+
+		WARN_ON(reply->reply_dst != conn);
+
+		if (reply->deadline_ns > now) {
+			/* remember next timeout */
+			if (deadline > reply->deadline_ns)
+				deadline = reply->deadline_ns;
+
+			continue;
+		}
+
+		/*
+		 * A zero deadline means the connection died, was
+		 * cleaned up already and the notification was sent.
+		 * Don't send notifications for reply trackers that were
+		 * left in an interrupted syscall state.
+		 */
+		if (reply->deadline_ns != 0 && !reply->interrupted)
+			kdbus_notify_reply_timeout(conn->ep->bus, conn->id,
+						   reply->cookie);
+
+		kdbus_reply_unlink(reply);
+	}
+
+	/* rearm delayed work with next timeout */
+	if (deadline != ~0ULL)
+		schedule_delayed_work(&conn->work,
+				      nsecs_to_jiffies(deadline - now));
+
+	mutex_unlock(&conn->lock);
+
+	kdbus_notify_flush(conn->ep->bus);
+}
diff -Nur linux-4.2/ipc/kdbus/reply.h linux-4.2-pck/ipc/kdbus/reply.h
--- linux-4.2/ipc/kdbus/reply.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/reply.h	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_REPLY_H
+#define __KDBUS_REPLY_H
+
+/**
+ * struct kdbus_reply - an entry of kdbus_conn's list of replies
+ * @kref:		Ref-count of this object
+ * @entry:		The entry of the connection's reply_list
+ * @reply_src:		The connection the reply will be sent from
+ * @reply_dst:		The connection the reply will be sent to
+ * @queue_entry:	The queue entry item that is prepared by the replying
+ *			connection
+ * @deadline_ns:	The deadline of the reply, in nanoseconds
+ * @cookie:		The cookie of the requesting message
+ * @name_id:		ID of the well-known name the original msg was sent to
+ * @sync:		The reply block is waiting for synchronous I/O
+ * @waiting:		The condition to synchronously wait for
+ * @interrupted:	The sync reply was left in an interrupted state
+ * @err:		The error code for the synchronous reply
+ */
+struct kdbus_reply {
+	struct kref kref;
+	struct list_head entry;
+	struct kdbus_conn *reply_src;
+	struct kdbus_conn *reply_dst;
+	struct kdbus_queue_entry *queue_entry;
+	u64 deadline_ns;
+	u64 cookie;
+	u64 name_id;
+	bool sync:1;
+	bool waiting:1;
+	bool interrupted:1;
+	int err;
+};
+
+struct kdbus_reply *kdbus_reply_new(struct kdbus_conn *reply_src,
+				    struct kdbus_conn *reply_dst,
+				    const struct kdbus_msg *msg,
+				    struct kdbus_name_entry *name_entry,
+				    bool sync);
+
+struct kdbus_reply *kdbus_reply_ref(struct kdbus_reply *r);
+struct kdbus_reply *kdbus_reply_unref(struct kdbus_reply *r);
+
+void kdbus_reply_link(struct kdbus_reply *r);
+void kdbus_reply_unlink(struct kdbus_reply *r);
+
+struct kdbus_reply *kdbus_reply_find(struct kdbus_conn *replying,
+				     struct kdbus_conn *reply_dst,
+				     u64 cookie);
+
+void kdbus_sync_reply_wakeup(struct kdbus_reply *reply, int err);
+void kdbus_reply_list_scan_work(struct work_struct *work);
+
+#endif /* __KDBUS_REPLY_H */
diff -Nur linux-4.2/ipc/kdbus/util.c linux-4.2-pck/ipc/kdbus/util.c
--- linux-4.2/ipc/kdbus/util.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/util.c	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/capability.h>
+#include <linux/cred.h>
+#include <linux/ctype.h>
+#include <linux/err.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/uio.h>
+#include <linux/user_namespace.h>
+
+#include "limits.h"
+#include "util.h"
+
+/**
+ * kdbus_copy_from_user() - copy aligned data from user-space
+ * @dest:	target buffer in kernel memory
+ * @user_ptr:	user-provided source buffer
+ * @size:	memory size to copy from user
+ *
+ * This copies @size bytes from @user_ptr into the kernel, just like
+ * copy_from_user() does. But we enforce an 8-byte alignment and reject any
+ * unaligned user-space pointers.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kdbus_copy_from_user(void *dest, void __user *user_ptr, size_t size)
+{
+	if (!KDBUS_IS_ALIGNED8((uintptr_t)user_ptr))
+		return -EFAULT;
+
+	if (copy_from_user(dest, user_ptr, size))
+		return -EFAULT;
+
+	return 0;
+}
+
+/**
+ * kdbus_verify_uid_prefix() - verify UID prefix of a user-supplied name
+ * @name:	user-supplied name to verify
+ * @user_ns:	user-namespace to act in
+ * @kuid:	Kernel internal uid of user
+ *
+ * This verifies that the user-supplied name @name has their UID as prefix. This
+ * is the default name-spacing policy we enforce on user-supplied names for
+ * public kdbus entities like buses and endpoints.
+ *
+ * The user must supply names prefixed with "<UID>-", whereas the UID is
+ * interpreted in the user-namespace of the domain. If the user fails to supply
+ * such a prefixed name, we reject it.
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int kdbus_verify_uid_prefix(const char *name, struct user_namespace *user_ns,
+			    kuid_t kuid)
+{
+	uid_t uid;
+	char prefix[16];
+
+	/*
+	 * The kuid must have a mapping into the userns of the domain
+	 * otherwise do not allow creation of buses nor endpoints.
+	 */
+	uid = from_kuid(user_ns, kuid);
+	if (uid == (uid_t) -1)
+		return -EINVAL;
+
+	snprintf(prefix, sizeof(prefix), "%u-", uid);
+	if (strncmp(name, prefix, strlen(prefix)) != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * kdbus_sanitize_attach_flags() - Sanitize attach flags from user-space
+ * @flags:		Attach flags provided by userspace
+ * @attach_flags:	A pointer where to store the valid attach flags
+ *
+ * Convert attach-flags provided by user-space into a valid mask. If the mask
+ * is invalid, an error is returned. The sanitized attach flags are stored in
+ * the output parameter.
+ *
+ * Return: 0 on success, negative error on failure.
+ */
+int kdbus_sanitize_attach_flags(u64 flags, u64 *attach_flags)
+{
+	/* 'any' degrades to 'all' for compatibility */
+	if (flags == _KDBUS_ATTACH_ANY)
+		flags = _KDBUS_ATTACH_ALL;
+
+	/* reject unknown attach flags */
+	if (flags & ~_KDBUS_ATTACH_ALL)
+		return -EINVAL;
+
+	*attach_flags = flags;
+	return 0;
+}
+
+/**
+ * kdbus_kvec_set - helper utility to assemble kvec arrays
+ * @kvec:	kvec entry to use
+ * @src:	Source address to set in @kvec
+ * @len:	Number of bytes in @src
+ * @total_len:	Pointer to total length variable
+ *
+ * Set @src and @len in @kvec, and increase @total_len by @len.
+ */
+void kdbus_kvec_set(struct kvec *kvec, void *src, size_t len, u64 *total_len)
+{
+	kvec->iov_base = src;
+	kvec->iov_len = len;
+	*total_len += len;
+}
+
+static const char * const zeros = "\0\0\0\0\0\0\0";
+
+/**
+ * kdbus_kvec_pad - conditionally write a padding kvec
+ * @kvec:	kvec entry to use
+ * @len:	Total length used for kvec array
+ *
+ * Check if the current total byte length of the array in @len is aligned to
+ * 8 bytes. If it isn't, fill @kvec with padding information and increase @len
+ * by the number of bytes stored in @kvec.
+ *
+ * Return: the number of added padding bytes.
+ */
+size_t kdbus_kvec_pad(struct kvec *kvec, u64 *len)
+{
+	size_t pad = KDBUS_ALIGN8(*len) - *len;
+
+	if (!pad)
+		return 0;
+
+	kvec->iov_base = (void *)zeros;
+	kvec->iov_len = pad;
+
+	*len += pad;
+
+	return pad;
+}
diff -Nur linux-4.2/ipc/kdbus/util.h linux-4.2-pck/ipc/kdbus/util.h
--- linux-4.2/ipc/kdbus/util.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/ipc/kdbus/util.h	2015-09-08 00:48:22.238363406 -0300
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright (C) 2013-2015 Linux Foundation
+ * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef __KDBUS_UTIL_H
+#define __KDBUS_UTIL_H
+
+#include <linux/dcache.h>
+#include <linux/ioctl.h>
+
+#include <uapi/linux/kdbus.h>
+
+/* all exported addresses are 64 bit */
+#define KDBUS_PTR(addr) ((void __user *)(uintptr_t)(addr))
+
+/* all exported sizes are 64 bit and data aligned to 64 bit */
+#define KDBUS_ALIGN8(s) ALIGN((s), 8)
+#define KDBUS_IS_ALIGNED8(s) (IS_ALIGNED(s, 8))
+
+/**
+ * kdbus_member_set_user - write a structure member to user memory
+ * @_s:		Variable to copy from
+ * @_b:		Buffer to write to
+ * @_t:		Structure type
+ * @_m:		Member name in the passed structure
+ *
+ * Return: the result of copy_to_user()
+ */
+#define kdbus_member_set_user(_s, _b, _t, _m)				\
+({									\
+	u64 __user *_sz =						\
+		(void __user *)((u8 __user *)(_b) + offsetof(_t, _m));	\
+	copy_to_user(_sz, _s, FIELD_SIZEOF(_t, _m));			\
+})
+
+/**
+ * kdbus_strhash - calculate a hash
+ * @str:	String
+ *
+ * Return: hash value
+ */
+static inline unsigned int kdbus_strhash(const char *str)
+{
+	unsigned long hash = init_name_hash();
+
+	while (*str)
+		hash = partial_name_hash(*str++, hash);
+
+	return end_name_hash(hash);
+}
+
+int kdbus_verify_uid_prefix(const char *name, struct user_namespace *user_ns,
+			    kuid_t kuid);
+int kdbus_sanitize_attach_flags(u64 flags, u64 *attach_flags);
+
+int kdbus_copy_from_user(void *dest, void __user *user_ptr, size_t size);
+
+struct kvec;
+
+void kdbus_kvec_set(struct kvec *kvec, void *src, size_t len, u64 *total_len);
+size_t kdbus_kvec_pad(struct kvec *kvec, u64 *len);
+
+#endif
diff -Nur linux-4.2/ipc/shm.c linux-4.2-pck/ipc/shm.c
--- linux-4.2/ipc/shm.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/ipc/shm.c	2015-09-08 11:20:32.423680758 -0300
@@ -545,7 +545,7 @@
 		if  ((shmflg & SHM_NORESERVE) &&
 				sysctl_overcommit_memory != OVERCOMMIT_NEVER)
 			acctflag = VM_NORESERVE;
-		file = shmem_kernel_file_setup(name, size, acctflag);
+		file = shmem_kernel_file_setup(name, size, acctflag, 0);
 	}
 	error = PTR_ERR(file);
 	if (IS_ERR(file))
diff -Nur linux-4.2/kernel/debug/kdb/kdb_io.c linux-4.2-pck/kernel/debug/kdb/kdb_io.c
--- linux-4.2/kernel/debug/kdb/kdb_io.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/debug/kdb/kdb_io.c	2015-09-08 00:48:22.238363406 -0300
@@ -710,7 +710,7 @@
 			}
 		}
 		while (c) {
-			c->write(c, cp, retlen - (cp - kdb_buffer));
+			c->write(c, cp, retlen - (cp - kdb_buffer), 7); /* 7 == KERN_DEBUG */
 			touch_nmi_watchdog();
 			c = c->next;
 		}
@@ -774,7 +774,7 @@
 			}
 		}
 		while (c) {
-			c->write(c, moreprompt, strlen(moreprompt));
+			c->write(c, moreprompt, strlen(moreprompt), 7); /* 7 == KERN_DEBUG */
 			touch_nmi_watchdog();
 			c = c->next;
 		}
diff -Nur linux-4.2/kernel/fork.c linux-4.2-pck/kernel/fork.c
--- linux-4.2/kernel/fork.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/fork.c	2015-09-08 11:20:32.430347100 -0300
@@ -138,7 +138,7 @@
 
 static inline struct task_struct *alloc_task_struct_node(int node)
 {
-	return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
+	return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL | ___GFP_TOI_NOTRACK, node);
 }
 
 static inline void free_task_struct(struct task_struct *tsk)
@@ -443,7 +443,7 @@
 				goto fail_nomem;
 			charge = len;
 		}
-		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+		tmp = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 		if (!tmp)
 			goto fail_nomem;
 		*tmp = *mpnt;
@@ -494,7 +494,7 @@
 		__vma_link_rb(mm, tmp, rb_link, rb_parent);
 		rb_link = &tmp->vm_rb.rb_right;
 		rb_parent = &tmp->vm_rb;
-
+		uksm_vma_add_new(tmp);
 		mm->map_count++;
 		retval = copy_page_range(mm, oldmm, mpnt);
 
diff -Nur linux-4.2/kernel/kthread.c linux-4.2-pck/kernel/kthread.c
--- linux-4.2/kernel/kthread.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/kthread.c	2015-09-08 11:20:32.440346612 -0300
@@ -274,7 +274,7 @@
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct task_struct *task;
 	struct kthread_create_info *create = kmalloc(sizeof(*create),
-						     GFP_KERNEL);
+						     GFP_KERNEL | ___GFP_TOI_NOTRACK);
 
 	if (!create)
 		return ERR_PTR(-ENOMEM);
diff -Nur linux-4.2/kernel/power/Kconfig linux-4.2-pck/kernel/power/Kconfig
--- linux-4.2/kernel/power/Kconfig	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/power/Kconfig	2015-09-08 11:20:32.450346125 -0300
@@ -91,6 +91,284 @@
 	  suspended image to. It will simply pick the first available swap 
 	  device.
 
+menuconfig TOI_CORE
+	bool "Enhanced Hibernation (TuxOnIce)"
+	depends on HIBERNATION
+	default y
+	---help---
+	  TuxOnIce is the 'new and improved' suspend support.
+
+	  See the TuxOnIce home page (tuxonice.net)
+	  for FAQs, HOWTOs and other documentation.
+
+	comment "Image Storage (you need at least one allocator)"
+		depends on TOI_CORE
+
+	config TOI_FILE
+		bool "File Allocator"
+		depends on TOI_CORE
+		default y
+		---help---
+		  This option enables support for storing an image in a
+		  simple file. You might want this if your swap is
+		  sometimes full enough that you don't have enough spare
+		  space to store an image.
+
+	config TOI_SWAP
+		bool "Swap Allocator"
+		depends on TOI_CORE && SWAP
+		default y
+		---help---
+		  This option enables support for storing an image in your
+		  swap space.
+
+	comment "General Options"
+		depends on TOI_CORE
+
+	config TOI_PRUNE
+		bool "Image pruning support"
+		depends on TOI_CORE && CRYPTO && BROKEN
+		default y
+		---help---
+		  This option adds support for using cryptoapi hashing
+		  algorithms to identify pages with the same content. We
+		  then write a much smaller pointer to the first copy of
+		  the data instead of a complete (perhaps compressed)
+                  additional copy.
+
+		  You probably want this, so say Y here.
+
+	comment "No image pruning support available without Cryptoapi support."
+		depends on TOI_CORE && !CRYPTO
+
+	config TOI_CRYPTO
+		bool "Compression support"
+		depends on TOI_CORE && CRYPTO
+		default y
+		---help---
+		  This option adds support for using cryptoapi compression
+		  algorithms. Compression is particularly useful as it can
+		  more than double your suspend and resume speed (depending
+		  upon how well your image compresses).
+
+		  You probably want this, so say Y here.
+
+	comment "No compression support available without Cryptoapi support."
+		depends on TOI_CORE && !CRYPTO
+
+	config TOI_USERUI
+		bool "Userspace User Interface support"
+		depends on TOI_CORE && NET && (VT || SERIAL_CONSOLE)
+		default y
+		---help---
+		  This option enabled support for a userspace based user interface
+		  to TuxOnIce, which allows you to have a nice display while suspending
+		  and resuming, and also enables features such as pressing escape to
+		  cancel a cycle or interactive debugging.
+
+	config TOI_USERUI_DEFAULT_PATH
+		string "Default userui program location"
+		default "/usr/local/sbin/tuxoniceui_text"
+		depends on TOI_USERUI
+		---help---
+		  This entry allows you to specify a default path to the userui binary.
+
+	config TOI_DEFAULT_IMAGE_SIZE_LIMIT
+		int "Default image size limit"
+		range -2 65536 
+		default "-2"
+		depends on TOI_CORE
+		---help---
+		  This entry allows you to specify a default image size limit. It can
+		  be overridden at run-time using /sys/power/tuxonice/image_size_limit.
+
+	config TOI_KEEP_IMAGE
+		bool "Allow Keep Image Mode"
+		depends on TOI_CORE
+		---help---
+		  This option allows you to keep and image and reuse it. It is intended
+		  __ONLY__ for use with systems where all filesystems are mounted read-
+		  only (kiosks, for example). To use it, compile this option in and boot
+		  normally. Set the KEEP_IMAGE flag in /sys/power/tuxonice and suspend.
+		  When you resume, the image will not be removed. You will be unable to turn
+		  off swap partitions (assuming you are using the swap allocator), but future
+		  suspends simply do a power-down. The image can be updated using the
+		  kernel command line parameter suspend_act= to turn off the keep image
+		  bit. Keep image mode is a little less user friendly on purpose - it
+		  should not be used without thought!
+
+	config TOI_INCREMENTAL
+		bool "Incremental Image Support"
+		depends on TOI_CORE && 64BIT && TOI_KEEP_IMAGE
+		default n
+		---help---
+		  This option enables the work in progress toward using the dirty page
+		  tracking to record changes to pages. It is hoped that
+		  this will be an initial step toward implementing storing just
+		  the differences between consecutive images, which will
+		  increase the amount of storage needed for the image, but also
+		  increase the speed at which writing an image occurs and
+		  reduce the wear and tear on drives.
+
+		  At the moment, all that is implemented is the first step of keeping
+		  an existing image and then comparing it to the contents in memory
+		  (by setting /sys/power/tuxonice/verify_image to 1 and triggering a
+		  (fake) resume) to see what the page change tracking should find to be
+		  different. If you have verify_image set to 1, TuxOnIce will automatically
+		  invalidate the old image when you next try to hibernate, so there's no
+		  greater chance of disk corruption than normal.
+
+	comment "No incremental image support available without Keep Image support."
+		depends on TOI_CORE && !TOI_KEEP_IMAGE && 64BIT
+
+	config TOI_REPLACE_SWSUSP
+		bool "Replace swsusp by default"
+		default y
+		depends on TOI_CORE
+		---help---
+		  TuxOnIce can replace swsusp. This option makes that the default state,
+		  requiring you to echo 0 > /sys/power/tuxonice/replace_swsusp if you want
+		  to use the vanilla kernel functionality. Note that your initrd/ramfs will
+		  need to do this before trying to resume, too.
+		  With overriding swsusp enabled, echoing disk  to /sys/power/state will
+		  start a TuxOnIce cycle. If resume= doesn't specify an allocator and both
+		  the swap and file allocators are compiled in, the swap allocator will be
+		  used by default.
+
+	config TOI_IGNORE_LATE_INITCALL
+		bool "Wait for initrd/ramfs to run, by default"
+		default n
+		depends on TOI_CORE
+		---help---
+		  When booting, TuxOnIce can check for an image and start to resume prior
+		  to any initrd/ramfs running (via a late initcall).
+
+		  If you don't have an initrd/ramfs, this is what you want to happen -
+		  otherwise you won't be able to safely resume. You should set this option
+		  to 'No'.
+
+		  If, however, you want your initrd/ramfs to run anyway before resuming,
+		  you need to tell TuxOnIce to ignore that earlier opportunity to resume.
+		  This can be done either by using this compile time option, or by
+		  overriding this option with the boot-time parameter toi_initramfs_resume_only=1.
+
+		  Note that if TuxOnIce can't resume at the earlier opportunity, the
+		  value of this option won't matter - the initramfs/initrd (if any) will
+		  run anyway.
+
+	menuconfig TOI_CLUSTER
+		bool "Cluster support"
+		default n
+		depends on TOI_CORE && NET && BROKEN
+		---help---
+		  Support for linking multiple machines in a cluster so that they suspend
+		  and resume together.
+
+	config TOI_DEFAULT_CLUSTER_INTERFACE
+		string "Default cluster interface"
+		depends on TOI_CLUSTER
+		---help---
+		  The default interface on which to communicate with other nodes in
+		  the cluster.
+
+		  If no value is set here, cluster support will be disabled by default.
+
+	config TOI_DEFAULT_CLUSTER_KEY
+		string "Default cluster key"
+		default "Default"
+		depends on TOI_CLUSTER
+		---help---
+		  The default key used by this node. All nodes in the same cluster
+		  have the same key. Multiple clusters may coexist on the same lan
+		  by using different values for this key.
+
+	config TOI_CLUSTER_IMAGE_TIMEOUT
+		int "Timeout when checking for image"
+		default 15
+		depends on TOI_CLUSTER
+		---help---
+		  Timeout (seconds) before continuing to boot when waiting to see
+		  whether other nodes might have an image. Set to -1 to wait
+		  indefinitely. In WAIT_UNTIL_NODES is non zero, we might continue
+		  booting sooner than this timeout.
+
+	config TOI_CLUSTER_WAIT_UNTIL_NODES
+		int "Nodes without image before continuing"
+		default 0
+		depends on TOI_CLUSTER
+		---help---
+		  When booting and no image is found, we wait to see if other nodes
+		  have an image before continuing to boot. This value lets us
+		  continue after seeing a certain number of nodes without an image,
+		  instead of continuing to wait for the timeout. Set to 0 to only
+		  use the timeout.
+
+	config TOI_DEFAULT_CLUSTER_PRE_HIBERNATE
+		string "Default pre-hibernate script"
+		depends on TOI_CLUSTER
+		---help---
+		  The default script to be called when starting to hibernate.
+
+	config TOI_DEFAULT_CLUSTER_POST_HIBERNATE
+		string "Default post-hibernate script"
+		depends on TOI_CLUSTER
+		---help---
+		  The default script to be called after resuming from hibernation.
+
+	config TOI_DEFAULT_WAIT
+		int "Default waiting time for emergency boot messages"
+		default "25"
+		range -1 32768
+		depends on TOI_CORE
+		help
+		  TuxOnIce can display warnings very early in the process of resuming,
+		  if (for example) it appears that you have booted a kernel that doesn't
+		  match an image on disk. It can then give you the opportunity to either
+		  continue booting that kernel, or reboot the machine. This option can be
+		  used to control how long to wait in such circumstances. -1 means wait
+		  forever. 0 means don't wait at all (do the default action, which will
+		  generally be to continue booting and remove the image). Values of 1 or
+		  more indicate a number of seconds (up to 255) to wait before doing the
+		  default.
+
+	config  TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE
+		int "Default extra pages allowance"
+		default "2000"
+		range 500 32768
+		depends on TOI_CORE
+		help
+		  This value controls the default for the allowance TuxOnIce makes for
+		  drivers to allocate extra memory during the atomic copy. The default
+		  value of 2000 will be okay in most cases. If you are using
+		  DRI, the easiest way to find what value to use is to try to hibernate
+		  and look at how many pages were actually needed in the sysfs entry
+		  /sys/power/tuxonice/debug_info (first number on the last line), adding
+		  a little extra because the value is not always the same.
+
+	config TOI_CHECKSUM
+		bool "Checksum pageset2"
+		default n
+		depends on TOI_CORE
+		select CRYPTO
+		select CRYPTO_ALGAPI
+		select CRYPTO_MD4
+		---help---
+		  Adds support for checksumming pageset2 pages, to ensure you really get an
+		  atomic copy. Since some filesystems (XFS especially) change metadata even
+		  when there's no other activity, we need this to check for pages that have
+		  been changed while we were saving the page cache. If your debugging output
+		  always says no pages were resaved, you may be able to safely disable this
+		  option.
+
+config TOI
+	bool
+	depends on TOI_CORE!=n
+	default y
+
+config TOI_ZRAM_SUPPORT
+	def_bool y
+	depends on TOI && ZRAM!=n
+
 config PM_SLEEP
 	def_bool y
 	depends on SUSPEND || HIBERNATE_CALLBACKS
diff -Nur linux-4.2/kernel/power/Makefile linux-4.2-pck/kernel/power/Makefile
--- linux-4.2/kernel/power/Makefile	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/power/Makefile	2015-09-08 11:20:32.460345634 -0300
@@ -1,6 +1,38 @@
 
 ccflags-$(CONFIG_PM_DEBUG)	:= -DDEBUG
 
+tuxonice_core-y := tuxonice_modules.o
+
+obj-$(CONFIG_TOI)		+= tuxonice_builtin.o
+obj-$(CONFIG_TOI_INCREMENTAL)   += tuxonice_incremental.o \
+    tuxonice_copy_before_write.o
+
+tuxonice_core-$(CONFIG_PM_DEBUG)	+= tuxonice_alloc.o
+
+# Compile these in after allocation debugging, if used.
+
+tuxonice_core-y += tuxonice_sysfs.o tuxonice_highlevel.o \
+		tuxonice_io.o tuxonice_pagedir.o tuxonice_prepare_image.o \
+		tuxonice_extent.o tuxonice_pageflags.o tuxonice_ui.o \
+		tuxonice_power_off.o tuxonice_atomic_copy.o
+
+tuxonice_core-$(CONFIG_TOI_CHECKSUM)	+= tuxonice_checksum.o
+
+tuxonice_core-$(CONFIG_NET)	+= tuxonice_storage.o tuxonice_netlink.o
+
+obj-$(CONFIG_TOI_CORE)		+= tuxonice_core.o
+obj-$(CONFIG_TOI_PRUNE)		+= tuxonice_prune.o
+obj-$(CONFIG_TOI_CRYPTO)	+= tuxonice_compress.o
+
+tuxonice_bio-y := tuxonice_bio_core.o tuxonice_bio_chains.o \
+		tuxonice_bio_signature.o
+
+obj-$(CONFIG_TOI_SWAP)		+= tuxonice_bio.o tuxonice_swap.o
+obj-$(CONFIG_TOI_FILE)		+= tuxonice_bio.o tuxonice_file.o
+obj-$(CONFIG_TOI_CLUSTER)	+= tuxonice_cluster.o
+
+obj-$(CONFIG_TOI_USERUI)	+= tuxonice_userui.o
+
 obj-y				+= qos.o
 obj-$(CONFIG_PM)		+= main.o
 obj-$(CONFIG_VT_CONSOLE_SLEEP)	+= console.o
diff -Nur linux-4.2/kernel/power/hibernate.c linux-4.2-pck/kernel/power/hibernate.c
--- linux-4.2/kernel/power/hibernate.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/power/hibernate.c	2015-09-08 11:20:32.460345634 -0300
@@ -31,7 +31,7 @@
 #include <linux/ktime.h>
 #include <trace/events/power.h>
 
-#include "power.h"
+#include "tuxonice.h"
 
 
 static int nocompress;
@@ -39,7 +39,7 @@
 static int nohibernate;
 static int resume_wait;
 static unsigned int resume_delay;
-static char resume_file[256] = CONFIG_PM_STD_PARTITION;
+char resume_file[256] = CONFIG_PM_STD_PARTITION;
 dev_t swsusp_resume_device;
 sector_t swsusp_resume_block;
 __visible int in_suspend __nosavedata;
@@ -123,7 +123,7 @@
  * platform_begin - Call platform to start hibernation.
  * @platform_mode: Whether or not to use the platform driver.
  */
-static int platform_begin(int platform_mode)
+int platform_begin(int platform_mode)
 {
 	return (platform_mode && hibernation_ops) ?
 		hibernation_ops->begin() : 0;
@@ -133,7 +133,7 @@
  * platform_end - Call platform to finish transition to the working state.
  * @platform_mode: Whether or not to use the platform driver.
  */
-static void platform_end(int platform_mode)
+void platform_end(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->end();
@@ -147,7 +147,7 @@
  * if so configured, and return an error code if that fails.
  */
 
-static int platform_pre_snapshot(int platform_mode)
+int platform_pre_snapshot(int platform_mode)
 {
 	return (platform_mode && hibernation_ops) ?
 		hibernation_ops->pre_snapshot() : 0;
@@ -162,7 +162,7 @@
  *
  * This routine is called on one CPU with interrupts disabled.
  */
-static void platform_leave(int platform_mode)
+void platform_leave(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->leave();
@@ -177,7 +177,7 @@
  *
  * This routine must be called after platform_prepare().
  */
-static void platform_finish(int platform_mode)
+void platform_finish(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->finish();
@@ -193,7 +193,7 @@
  * If the restore fails after this function has been called,
  * platform_restore_cleanup() must be called.
  */
-static int platform_pre_restore(int platform_mode)
+int platform_pre_restore(int platform_mode)
 {
 	return (platform_mode && hibernation_ops) ?
 		hibernation_ops->pre_restore() : 0;
@@ -210,7 +210,7 @@
  * function must be called too, regardless of the result of
  * platform_pre_restore().
  */
-static void platform_restore_cleanup(int platform_mode)
+void platform_restore_cleanup(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->restore_cleanup();
@@ -220,7 +220,7 @@
  * platform_recover - Recover from a failure to suspend devices.
  * @platform_mode: Whether or not to use the platform driver.
  */
-static void platform_recover(int platform_mode)
+void platform_recover(int platform_mode)
 {
 	if (platform_mode && hibernation_ops && hibernation_ops->recover)
 		hibernation_ops->recover();
@@ -648,6 +648,9 @@
 {
 	int error;
 
+	if (test_action_state(TOI_REPLACE_SWSUSP))
+		return try_tuxonice_hibernate();
+
 	if (!hibernation_available()) {
 		pr_debug("PM: Hibernation not available.\n");
 		return -EPERM;
@@ -737,11 +740,19 @@
  * attempts to recover gracefully and make the kernel return to the normal mode
  * of operation.
  */
-static int software_resume(void)
+int software_resume(void)
 {
 	int error;
 	unsigned int flags;
 
+	resume_attempted = 1;
+
+	/*
+	 * We can't know (until an image header - if any - is loaded), whether
+	 * we did override swsusp. We therefore ensure that both are tried.
+	 */
+	try_tuxonice_resume();
+
 	/*
 	 * If the user said "noresume".. bail out early.
 	 */
@@ -1128,6 +1139,7 @@
 static int __init noresume_setup(char *str)
 {
 	noresume = 1;
+	set_toi_state(TOI_NORESUME_SPECIFIED);
 	return 1;
 }
 
diff -Nur linux-4.2/kernel/power/power.h linux-4.2-pck/kernel/power/power.h
--- linux-4.2/kernel/power/power.h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/power/power.h	2015-09-08 11:20:32.460345634 -0300
@@ -36,8 +36,12 @@
 	return arch_hibernation_header_restore(info) ?
 			"architecture specific data" : NULL;
 }
+#else
+extern char *check_image_kernel(struct swsusp_info *info);
 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
+extern int init_header(struct swsusp_info *info);
 
+extern char resume_file[256];
 /*
  * Keep some memory free so that I/O operations can succeed without paging
  * [Might this be more than 4 MB?]
@@ -77,6 +81,8 @@
 	.store	= _name##_store,		\
 }
 
+extern struct pbe *restore_pblist;
+
 /* Preferred image size in bytes (default 500 MB) */
 extern unsigned long image_size;
 /* Size of memory reserved for drivers (default SPARE_PAGES x PAGE_SIZE) */
@@ -260,6 +266,31 @@
 }
 #endif
 
+extern struct page *saveable_page(struct zone *z, unsigned long p);
+#ifdef CONFIG_HIGHMEM
+struct page *saveable_highmem_page(struct zone *z, unsigned long p);
+#else
+static
+inline void *saveable_highmem_page(struct zone *z, unsigned long p)
+{
+	return NULL;
+}
+#endif
+
+#define PBES_PER_PAGE (PAGE_SIZE / sizeof(struct pbe))
+extern struct list_head nosave_regions;
+
+/**
+ *	This structure represents a range of page frames the contents of which
+ *	should not be saved during the suspend.
+ */
+
+struct nosave_region {
+	struct list_head list;
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+};
+
 #ifdef CONFIG_PM_AUTOSLEEP
 
 /* kernel/power/autosleep.c */
@@ -286,3 +317,10 @@
 extern int pm_wake_unlock(const char *buf);
 
 #endif /* !CONFIG_PM_WAKELOCKS */
+
+#ifdef CONFIG_TOI
+unsigned long toi_get_nonconflicting_page(void);
+#define BM_END_OF_MAP	(~0UL)
+#else
+#define toi_get_nonconflicting_page() (0)
+#endif
diff -Nur linux-4.2/kernel/power/snapshot.c linux-4.2-pck/kernel/power/snapshot.c
--- linux-4.2/kernel/power/snapshot.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/power/snapshot.c	2015-09-08 11:20:32.460345634 -0300
@@ -36,6 +36,9 @@
 #include <asm/tlbflush.h>
 #include <asm/io.h>
 
+#include "tuxonice_modules.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_alloc.h"
 #include "power.h"
 
 static int swsusp_page_is_free(struct page *);
@@ -98,6 +101,9 @@
 {
 	void *res;
 
+        if (toi_running)
+            return (void *) toi_get_nonconflicting_page();
+
 	res = (void *)get_zeroed_page(gfp_mask);
 	if (safe_needed)
 		while (res && swsusp_page_is_free(virt_to_page(res))) {
@@ -143,6 +149,11 @@
 
 	page = virt_to_page(addr);
 
+        if (toi_running) {
+            toi__free_page(29, page);
+            return;
+        }
+
 	swsusp_unset_page_forbidden(page);
 	if (clear_nosave_free)
 		swsusp_unset_page_free(page);
@@ -302,13 +313,15 @@
 	int node_bit;
 };
 
+#define BM_POSITION_SLOTS (NR_CPUS * 2)
+
 struct memory_bitmap {
 	struct list_head zones;
 	struct linked_page *p_list;	/* list of pages used to store zone
 					 * bitmap objects and bitmap block
 					 * objects
 					 */
-	struct bm_position cur;	/* most recently used bit position */
+	struct bm_position cur[BM_POSITION_SLOTS];    /* most recently used bit position */
 };
 
 /* Functions that operate on memory bitmaps */
@@ -473,16 +486,39 @@
 		free_image_page(node->data, clear_nosave_free);
 }
 
-static void memory_bm_position_reset(struct memory_bitmap *bm)
+void memory_bm_position_reset(struct memory_bitmap *bm)
 {
-	bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
+    int index;
+
+    for (index = 0; index < BM_POSITION_SLOTS; index++) {
+	bm->cur[index].zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
 				  list);
-	bm->cur.node = list_entry(bm->cur.zone->leaves.next,
+	bm->cur[index].node = list_entry(bm->cur[index].zone->leaves.next,
 				  struct rtree_node, list);
-	bm->cur.node_pfn = 0;
-	bm->cur.node_bit = 0;
+	bm->cur[index].node_pfn = 0;
+	bm->cur[index].node_bit = 0;
+    }
 }
 
+static void memory_bm_clear_current(struct memory_bitmap *bm, int index);
+unsigned long memory_bm_next_pfn(struct memory_bitmap *bm, int index);
+
+/**
+ *      memory_bm_clear
+ *      @param bm - The bitmap to clear
+ *
+ *      Only run while single threaded - locking not needed
+ */
+void memory_bm_clear(struct memory_bitmap *bm)
+{
+    memory_bm_position_reset(bm);
+
+    while (memory_bm_next_pfn(bm, 0) != BM_END_OF_MAP) {
+        memory_bm_clear_current(bm, 0);
+    }
+
+    memory_bm_position_reset(bm);
+}
 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
 
 struct mem_extent {
@@ -595,7 +631,8 @@
 	}
 
 	bm->p_list = ca.chain;
-	memory_bm_position_reset(bm);
+
+        memory_bm_position_reset(bm);
  Exit:
 	free_mem_extents(&mem_extents);
 	return error;
@@ -631,14 +668,24 @@
  *	It walks the radix tree to find the page which contains the bit for
  *	pfn and returns the bit position in **addr and *bit_nr.
  */
-static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
-			      void **addr, unsigned int *bit_nr)
+int memory_bm_find_bit(struct memory_bitmap *bm, int index,
+        unsigned long pfn, void **addr, unsigned int *bit_nr)
 {
 	struct mem_zone_bm_rtree *curr, *zone;
 	struct rtree_node *node;
 	int i, block_nr;
 
-	zone = bm->cur.zone;
+        if (!bm->cur[index].zone) {
+            // Reset
+            bm->cur[index].zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
+                    list);
+            bm->cur[index].node = list_entry(bm->cur[index].zone->leaves.next,
+                    struct rtree_node, list);
+            bm->cur[index].node_pfn = 0;
+            bm->cur[index].node_bit = 0;
+        }
+
+	zone = bm->cur[index].zone;
 
 	if (pfn >= zone->start_pfn && pfn < zone->end_pfn)
 		goto zone_found;
@@ -662,8 +709,8 @@
 	 * node for our pfn.
 	 */
 
-	node = bm->cur.node;
-	if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
+	node = bm->cur[index].node;
+	if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur[index].node_pfn)
 		goto node_found;
 
 	node      = zone->rtree;
@@ -680,9 +727,9 @@
 
 node_found:
 	/* Update last position */
-	bm->cur.zone = zone;
-	bm->cur.node = node;
-	bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
+	bm->cur[index].zone = zone;
+	bm->cur[index].node = node;
+	bm->cur[index].node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
 
 	/* Set return values */
 	*addr = node->data;
@@ -691,66 +738,66 @@
 	return 0;
 }
 
-static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
+void memory_bm_set_bit(struct memory_bitmap *bm, int index, unsigned long pfn)
 {
 	void *addr;
 	unsigned int bit;
 	int error;
 
-	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
 	BUG_ON(error);
 	set_bit(bit, addr);
 }
 
-static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
+int mem_bm_set_bit_check(struct memory_bitmap *bm, int index, unsigned long pfn)
 {
 	void *addr;
 	unsigned int bit;
 	int error;
 
-	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
 	if (!error)
 		set_bit(bit, addr);
 
 	return error;
 }
 
-static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
+void memory_bm_clear_bit(struct memory_bitmap *bm, int index, unsigned long pfn)
 {
 	void *addr;
 	unsigned int bit;
 	int error;
 
-	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
 	BUG_ON(error);
 	clear_bit(bit, addr);
 }
 
-static void memory_bm_clear_current(struct memory_bitmap *bm)
+static void memory_bm_clear_current(struct memory_bitmap *bm, int index)
 {
 	int bit;
 
-	bit = max(bm->cur.node_bit - 1, 0);
-	clear_bit(bit, bm->cur.node->data);
+	bit = max(bm->cur[index].node_bit - 1, 0);
+	clear_bit(bit, bm->cur[index].node->data);
 }
 
-static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
+int memory_bm_test_bit(struct memory_bitmap *bm, int index, unsigned long pfn)
 {
 	void *addr;
 	unsigned int bit;
 	int error;
 
-	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
 	BUG_ON(error);
 	return test_bit(bit, addr);
 }
 
-static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
+static bool memory_bm_pfn_present(struct memory_bitmap *bm, int index, unsigned long pfn)
 {
 	void *addr;
 	unsigned int bit;
 
-	return !memory_bm_find_bit(bm, pfn, &addr, &bit);
+	return !memory_bm_find_bit(bm, index, pfn, &addr, &bit);
 }
 
 /*
@@ -763,25 +810,25 @@
  *
  *	Returns true if there is a next node, false otherwise.
  */
-static bool rtree_next_node(struct memory_bitmap *bm)
+static bool rtree_next_node(struct memory_bitmap *bm, int index)
 {
-	bm->cur.node = list_entry(bm->cur.node->list.next,
+	bm->cur[index].node = list_entry(bm->cur[index].node->list.next,
 				  struct rtree_node, list);
-	if (&bm->cur.node->list != &bm->cur.zone->leaves) {
-		bm->cur.node_pfn += BM_BITS_PER_BLOCK;
-		bm->cur.node_bit  = 0;
+	if (&bm->cur[index].node->list != &bm->cur[index].zone->leaves) {
+		bm->cur[index].node_pfn += BM_BITS_PER_BLOCK;
+		bm->cur[index].node_bit  = 0;
 		touch_softlockup_watchdog();
 		return true;
 	}
 
 	/* No more nodes, goto next zone */
-	bm->cur.zone = list_entry(bm->cur.zone->list.next,
+	bm->cur[index].zone = list_entry(bm->cur[index].zone->list.next,
 				  struct mem_zone_bm_rtree, list);
-	if (&bm->cur.zone->list != &bm->zones) {
-		bm->cur.node = list_entry(bm->cur.zone->leaves.next,
+	if (&bm->cur[index].zone->list != &bm->zones) {
+		bm->cur[index].node = list_entry(bm->cur[index].zone->leaves.next,
 					  struct rtree_node, list);
-		bm->cur.node_pfn = 0;
-		bm->cur.node_bit = 0;
+		bm->cur[index].node_pfn = 0;
+		bm->cur[index].node_bit = 0;
 		return true;
 	}
 
@@ -799,38 +846,29 @@
  *	It is required to run memory_bm_position_reset() before the
  *	first call to this function.
  */
-static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
+unsigned long memory_bm_next_pfn(struct memory_bitmap *bm, int index)
 {
 	unsigned long bits, pfn, pages;
 	int bit;
 
+        index += NR_CPUS; /* Iteration state is separated from get/set/test */
+
 	do {
-		pages	  = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn;
-		bits      = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK);
-		bit	  = find_next_bit(bm->cur.node->data, bits,
-					  bm->cur.node_bit);
+		pages	  = bm->cur[index].zone->end_pfn - bm->cur[index].zone->start_pfn;
+		bits      = min(pages - bm->cur[index].node_pfn, BM_BITS_PER_BLOCK);
+		bit	  = find_next_bit(bm->cur[index].node->data, bits,
+					  bm->cur[index].node_bit);
 		if (bit < bits) {
-			pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
-			bm->cur.node_bit = bit + 1;
+			pfn = bm->cur[index].zone->start_pfn + bm->cur[index].node_pfn + bit;
+			bm->cur[index].node_bit = bit + 1;
 			return pfn;
 		}
-	} while (rtree_next_node(bm));
+	} while (rtree_next_node(bm, index));
 
 	return BM_END_OF_MAP;
 }
 
-/**
- *	This structure represents a range of page frames the contents of which
- *	should not be saved during the suspend.
- */
-
-struct nosave_region {
-	struct list_head list;
-	unsigned long start_pfn;
-	unsigned long end_pfn;
-};
-
-static LIST_HEAD(nosave_regions);
+LIST_HEAD(nosave_regions);
 
 /**
  *	register_nosave_region - register a range of page frames the contents
@@ -889,37 +927,37 @@
 void swsusp_set_page_free(struct page *page)
 {
 	if (free_pages_map)
-		memory_bm_set_bit(free_pages_map, page_to_pfn(page));
+		memory_bm_set_bit(free_pages_map, 0, page_to_pfn(page));
 }
 
 static int swsusp_page_is_free(struct page *page)
 {
 	return free_pages_map ?
-		memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
+		memory_bm_test_bit(free_pages_map, 0, page_to_pfn(page)) : 0;
 }
 
 void swsusp_unset_page_free(struct page *page)
 {
 	if (free_pages_map)
-		memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
+		memory_bm_clear_bit(free_pages_map, 0, page_to_pfn(page));
 }
 
 static void swsusp_set_page_forbidden(struct page *page)
 {
 	if (forbidden_pages_map)
-		memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
+		memory_bm_set_bit(forbidden_pages_map, 0, page_to_pfn(page));
 }
 
 int swsusp_page_is_forbidden(struct page *page)
 {
 	return forbidden_pages_map ?
-		memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
+		memory_bm_test_bit(forbidden_pages_map, 0, page_to_pfn(page)) : 0;
 }
 
 static void swsusp_unset_page_forbidden(struct page *page)
 {
 	if (forbidden_pages_map)
-		memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
+		memory_bm_clear_bit(forbidden_pages_map, 0, page_to_pfn(page));
 }
 
 /**
@@ -950,7 +988,7 @@
 				 * touch the PFNs for which the error is
 				 * returned anyway.
 				 */
-				mem_bm_set_bit_check(bm, pfn);
+				mem_bm_set_bit_check(bm, 0, pfn);
 			}
 	}
 }
@@ -1078,7 +1116,7 @@
  *	We should save the page if it isn't Nosave or NosaveFree, or Reserved,
  *	and it isn't a part of a free chunk of pages.
  */
-static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
+struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 {
 	struct page *page;
 
@@ -1125,11 +1163,6 @@
 	}
 	return n;
 }
-#else
-static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
-{
-	return NULL;
-}
 #endif /* CONFIG_HIGHMEM */
 
 /**
@@ -1140,7 +1173,7 @@
  *	of pages statically defined as 'unsaveable', and it isn't a part of
  *	a free chunk of pages.
  */
-static struct page *saveable_page(struct zone *zone, unsigned long pfn)
+struct page *saveable_page(struct zone *zone, unsigned long pfn)
 {
 	struct page *page;
 
@@ -1278,15 +1311,15 @@
 		max_zone_pfn = zone_end_pfn(zone);
 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 			if (page_is_saveable(zone, pfn))
-				memory_bm_set_bit(orig_bm, pfn);
+				memory_bm_set_bit(orig_bm, 0, pfn);
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
 	for(;;) {
-		pfn = memory_bm_next_pfn(orig_bm);
+		pfn = memory_bm_next_pfn(orig_bm, 0);
 		if (unlikely(pfn == BM_END_OF_MAP))
 			break;
-		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
+		copy_data_page(memory_bm_next_pfn(copy_bm, 0), pfn);
 	}
 }
 
@@ -1332,8 +1365,8 @@
 	memory_bm_position_reset(free_pages_map);
 
 loop:
-	fr_pfn = memory_bm_next_pfn(free_pages_map);
-	fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
+	fr_pfn = memory_bm_next_pfn(free_pages_map, 0);
+	fb_pfn = memory_bm_next_pfn(forbidden_pages_map, 0);
 
 	/*
 	 * Find the next bit set in both bitmaps. This is guaranteed to
@@ -1341,16 +1374,16 @@
 	 */
 	do {
 		if (fb_pfn < fr_pfn)
-			fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
+			fb_pfn = memory_bm_next_pfn(forbidden_pages_map, 0);
 		if (fr_pfn < fb_pfn)
-			fr_pfn = memory_bm_next_pfn(free_pages_map);
+			fr_pfn = memory_bm_next_pfn(free_pages_map, 0);
 	} while (fb_pfn != fr_pfn);
 
 	if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) {
 		struct page *page = pfn_to_page(fr_pfn);
 
-		memory_bm_clear_current(forbidden_pages_map);
-		memory_bm_clear_current(free_pages_map);
+		memory_bm_clear_current(forbidden_pages_map, 0);
+		memory_bm_clear_current(free_pages_map, 0);
 		__free_page(page);
 		goto loop;
 	}
@@ -1385,7 +1418,7 @@
 		page = alloc_image_page(mask);
 		if (!page)
 			break;
-		memory_bm_set_bit(&copy_bm, page_to_pfn(page));
+		memory_bm_set_bit(&copy_bm, 0, page_to_pfn(page));
 		if (PageHighMem(page))
 			alloc_highmem++;
 		else
@@ -1481,7 +1514,7 @@
 	memory_bm_position_reset(&copy_bm);
 
 	while (to_free_normal > 0 || to_free_highmem > 0) {
-		unsigned long pfn = memory_bm_next_pfn(&copy_bm);
+		unsigned long pfn = memory_bm_next_pfn(&copy_bm, 0);
 		struct page *page = pfn_to_page(pfn);
 
 		if (PageHighMem(page)) {
@@ -1495,7 +1528,7 @@
 			to_free_normal--;
 			alloc_normal--;
 		}
-		memory_bm_clear_bit(&copy_bm, pfn);
+		memory_bm_clear_bit(&copy_bm, 0, pfn);
 		swsusp_unset_page_forbidden(page);
 		swsusp_unset_page_free(page);
 		__free_page(page);
@@ -1780,7 +1813,7 @@
 		struct page *page;
 
 		page = alloc_image_page(__GFP_HIGHMEM);
-		memory_bm_set_bit(bm, page_to_pfn(page));
+		memory_bm_set_bit(bm, 0, page_to_pfn(page));
 	}
 	return nr_highmem;
 }
@@ -1823,7 +1856,7 @@
 			page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
 			if (!page)
 				goto err_out;
-			memory_bm_set_bit(copy_bm, page_to_pfn(page));
+			memory_bm_set_bit(copy_bm, 0, page_to_pfn(page));
 		}
 	}
 
@@ -1838,6 +1871,9 @@
 {
 	unsigned int nr_pages, nr_highmem;
 
+        if (toi_running)
+            return toi_post_context_save();
+
 	printk(KERN_INFO "PM: Creating hibernation image:\n");
 
 	drain_local_pages(NULL);
@@ -1885,7 +1921,7 @@
 	return 0;
 }
 
-static char *check_image_kernel(struct swsusp_info *info)
+char *check_image_kernel(struct swsusp_info *info)
 {
 	if (info->version_code != LINUX_VERSION_CODE)
 		return "kernel version";
@@ -1906,7 +1942,7 @@
 	return nr_copy_pages + nr_meta_pages + 1;
 }
 
-static int init_header(struct swsusp_info *info)
+int init_header(struct swsusp_info *info)
 {
 	memset(info, 0, sizeof(struct swsusp_info));
 	info->num_physpages = get_num_physpages();
@@ -1928,7 +1964,7 @@
 	int j;
 
 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
-		buf[j] = memory_bm_next_pfn(bm);
+		buf[j] = memory_bm_next_pfn(bm, 0);
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
 		/* Save page key for data page (s390 only). */
@@ -1979,7 +2015,7 @@
 	} else {
 		struct page *page;
 
-		page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
+		page = pfn_to_page(memory_bm_next_pfn(&copy_bm, 0));
 		if (PageHighMem(page)) {
 			/* Highmem pages are copied to the buffer,
 			 * because we can't return with a kmapped
@@ -2021,7 +2057,7 @@
 	/* Mark pages that correspond to the "original" pfns as "unsafe" */
 	memory_bm_position_reset(bm);
 	do {
-		pfn = memory_bm_next_pfn(bm);
+		pfn = memory_bm_next_pfn(bm, 0);
 		if (likely(pfn != BM_END_OF_MAP)) {
 			if (likely(pfn_valid(pfn)))
 				swsusp_set_page_free(pfn_to_page(pfn));
@@ -2041,10 +2077,10 @@
 	unsigned long pfn;
 
 	memory_bm_position_reset(src);
-	pfn = memory_bm_next_pfn(src);
+	pfn = memory_bm_next_pfn(src, 0);
 	while (pfn != BM_END_OF_MAP) {
-		memory_bm_set_bit(dst, pfn);
-		pfn = memory_bm_next_pfn(src);
+		memory_bm_set_bit(dst, 0, pfn);
+		pfn = memory_bm_next_pfn(src, 0);
 	}
 }
 
@@ -2095,8 +2131,8 @@
 		/* Extract and buffer page key for data page (s390 only). */
 		page_key_memorize(buf + j);
 
-		if (memory_bm_pfn_present(bm, buf[j]))
-			memory_bm_set_bit(bm, buf[j]);
+		if (memory_bm_pfn_present(bm, 0, buf[j]))
+			memory_bm_set_bit(bm, 0, buf[j]);
 		else
 			return -EFAULT;
 	}
@@ -2139,12 +2175,12 @@
 	unsigned int cnt = 0;
 
 	memory_bm_position_reset(bm);
-	pfn = memory_bm_next_pfn(bm);
+	pfn = memory_bm_next_pfn(bm, 0);
 	while (pfn != BM_END_OF_MAP) {
 		if (PageHighMem(pfn_to_page(pfn)))
 			cnt++;
 
-		pfn = memory_bm_next_pfn(bm);
+		pfn = memory_bm_next_pfn(bm, 0);
 	}
 	return cnt;
 }
@@ -2189,7 +2225,7 @@
 		page = alloc_page(__GFP_HIGHMEM);
 		if (!swsusp_page_is_free(page)) {
 			/* The page is "safe", set its bit the bitmap */
-			memory_bm_set_bit(bm, page_to_pfn(page));
+			memory_bm_set_bit(bm, 0, page_to_pfn(page));
 			safe_highmem_pages++;
 		}
 		/* Mark the page as allocated */
@@ -2247,7 +2283,7 @@
 
 		/* Copy of the page will be stored in high memory */
 		kaddr = buffer;
-		tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
+		tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm, 0));
 		safe_highmem_pages--;
 		last_highmem_page = tmp;
 		pbe->copy_page = tmp;
@@ -2418,7 +2454,7 @@
 {
 	struct pbe *pbe;
 	struct page *page;
-	unsigned long pfn = memory_bm_next_pfn(bm);
+	unsigned long pfn = memory_bm_next_pfn(bm, 0);
 
 	if (pfn == BM_END_OF_MAP)
 		return ERR_PTR(-EFAULT);
@@ -2605,3 +2641,82 @@
 	return 0;
 }
 #endif /* CONFIG_HIGHMEM */
+
+struct memory_bitmap *pageset1_map, *pageset2_map, *free_map, *nosave_map,
+  *pageset1_copy_map, *io_map, *page_resave_map, *compare_map;
+
+int resume_attempted;
+
+int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
+	(int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
+{
+    int result;
+
+    memory_bm_position_reset(bm);
+
+    do {
+        result = rw_chunk(WRITE, NULL, (char *) bm->cur[0].node->data, PAGE_SIZE);
+
+        if (result)
+            return result;
+    } while (rtree_next_node(bm, 0));
+    return 0;
+}
+
+int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
+	(int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
+{
+    int result;
+
+    memory_bm_position_reset(bm);
+
+    do {
+        result = rw_chunk(READ, NULL, (char *) bm->cur[0].node->data, PAGE_SIZE);
+
+        if (result)
+            return result;
+
+    } while (rtree_next_node(bm, 0));
+    return 0;
+}
+
+int memory_bm_space_needed(struct memory_bitmap *bm)
+{
+    unsigned long bytes = 0;
+
+    memory_bm_position_reset(bm);
+    do {
+        bytes += PAGE_SIZE;
+    } while (rtree_next_node(bm, 0));
+    return bytes;
+}
+
+int toi_alloc_bitmap(struct memory_bitmap **bm)
+{
+    int error;
+    struct memory_bitmap *bm1;
+
+    bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
+    if (!bm1)
+        return -ENOMEM;
+
+    error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
+    if (error) {
+        printk("Error returned - %d.\n", error);
+        kfree(bm1);
+        return -ENOMEM;
+    }
+
+    *bm = bm1;
+    return 0;
+}
+
+void toi_free_bitmap(struct memory_bitmap **bm)
+{
+    if (!*bm)
+        return;
+
+    memory_bm_free(*bm, 0);
+    kfree(*bm);
+    *bm = NULL;
+}
diff -Nur linux-4.2/kernel/power/tuxonice.h linux-4.2-pck/kernel/power/tuxonice.h
--- linux-4.2/kernel/power/tuxonice.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice.h	2015-09-08 11:20:32.460345634 -0300
@@ -0,0 +1,260 @@
+/*
+ * kernel/power/tuxonice.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations used throughout swsusp.
+ *
+ */
+
+#ifndef KERNEL_POWER_TOI_H
+#define KERNEL_POWER_TOI_H
+
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/suspend.h>
+#include <linux/fs.h>
+#include <asm/setup.h>
+#include "tuxonice_pageflags.h"
+#include "power.h"
+
+#define TOI_CORE_VERSION "3.3"
+#define        TOI_HEADER_VERSION 3
+#define MY_BOOT_KERNEL_DATA_VERSION 4
+
+struct toi_boot_kernel_data {
+        int version;
+        int size;
+        unsigned long toi_action;
+        unsigned long toi_debug_state;
+        u32 toi_default_console_level;
+        int toi_io_time[2][2];
+        char toi_nosave_commandline[COMMAND_LINE_SIZE];
+        unsigned long pages_used[33];
+        unsigned long incremental_bytes_in;
+        unsigned long incremental_bytes_out;
+        unsigned long compress_bytes_in;
+        unsigned long compress_bytes_out;
+        unsigned long pruned_pages;
+};
+
+extern struct toi_boot_kernel_data toi_bkd;
+
+/* Location of book kernel data struct in kernel being resumed */
+extern unsigned long boot_kernel_data_buffer;
+
+/*                 == Action states ==                 */
+
+enum {
+        TOI_REBOOT,
+        TOI_PAUSE,
+        TOI_LOGALL,
+        TOI_CAN_CANCEL,
+        TOI_KEEP_IMAGE,
+        TOI_FREEZER_TEST,
+        TOI_SINGLESTEP,
+        TOI_PAUSE_NEAR_PAGESET_END,
+        TOI_TEST_FILTER_SPEED,
+        TOI_TEST_BIO,
+        TOI_NO_PAGESET2,
+        TOI_IGNORE_ROOTFS,
+        TOI_REPLACE_SWSUSP,
+        TOI_PAGESET2_FULL,
+        TOI_ABORT_ON_RESAVE_NEEDED,
+        TOI_NO_MULTITHREADED_IO,
+        TOI_NO_DIRECT_LOAD, /* Obsolete */
+        TOI_LATE_CPU_HOTPLUG, /* Obsolete */
+        TOI_GET_MAX_MEM_ALLOCD,
+        TOI_NO_FLUSHER_THREAD,
+        TOI_NO_PS2_IF_UNNEEDED,
+        TOI_POST_RESUME_BREAKPOINT,
+        TOI_NO_READAHEAD,
+        TOI_TRACE_DEBUG_ON,
+        TOI_INCREMENTAL_IMAGE,
+};
+
+extern unsigned long toi_bootflags_mask;
+
+#define clear_action_state(bit) (test_and_clear_bit(bit, &toi_bkd.toi_action))
+
+/*                 == Result states ==                 */
+
+enum {
+        TOI_ABORTED,
+        TOI_ABORT_REQUESTED,
+        TOI_NOSTORAGE_AVAILABLE,
+        TOI_INSUFFICIENT_STORAGE,
+        TOI_FREEZING_FAILED,
+        TOI_KEPT_IMAGE,
+        TOI_WOULD_EAT_MEMORY,
+        TOI_UNABLE_TO_FREE_ENOUGH_MEMORY,
+        TOI_PM_SEM,
+        TOI_DEVICE_REFUSED,
+        TOI_SYSDEV_REFUSED,
+        TOI_EXTRA_PAGES_ALLOW_TOO_SMALL,
+        TOI_UNABLE_TO_PREPARE_IMAGE,
+        TOI_FAILED_MODULE_INIT,
+        TOI_FAILED_MODULE_CLEANUP,
+        TOI_FAILED_IO,
+        TOI_OUT_OF_MEMORY,
+        TOI_IMAGE_ERROR,
+        TOI_PLATFORM_PREP_FAILED,
+        TOI_CPU_HOTPLUG_FAILED,
+        TOI_ARCH_PREPARE_FAILED, /* Removed Linux-3.0 */
+        TOI_RESAVE_NEEDED,
+        TOI_CANT_SUSPEND,
+        TOI_NOTIFIERS_PREPARE_FAILED,
+        TOI_PRE_SNAPSHOT_FAILED,
+        TOI_PRE_RESTORE_FAILED,
+        TOI_USERMODE_HELPERS_ERR,
+        TOI_CANT_USE_ALT_RESUME,
+        TOI_HEADER_TOO_BIG,
+        TOI_WAKEUP_EVENT,
+        TOI_SYSCORE_REFUSED,
+        TOI_DPM_PREPARE_FAILED,
+        TOI_DPM_SUSPEND_FAILED,
+        TOI_NUM_RESULT_STATES        /* Used in printing debug info only */
+};
+
+extern unsigned long toi_result;
+
+#define set_result_state(bit) (test_and_set_bit(bit, &toi_result))
+#define set_abort_result(bit) (test_and_set_bit(TOI_ABORTED, &toi_result), \
+                                test_and_set_bit(bit, &toi_result))
+#define clear_result_state(bit) (test_and_clear_bit(bit, &toi_result))
+#define test_result_state(bit) (test_bit(bit, &toi_result))
+
+/*         == Debug sections and levels ==         */
+
+/* debugging levels. */
+enum {
+        TOI_STATUS = 0,
+        TOI_ERROR = 2,
+        TOI_LOW,
+        TOI_MEDIUM,
+        TOI_HIGH,
+        TOI_VERBOSE,
+};
+
+enum {
+        TOI_ANY_SECTION,
+        TOI_EAT_MEMORY,
+        TOI_IO,
+        TOI_HEADER,
+        TOI_WRITER,
+        TOI_MEMORY,
+        TOI_PAGEDIR,
+        TOI_COMPRESS,
+        TOI_BIO,
+};
+
+#define set_debug_state(bit) (test_and_set_bit(bit, &toi_bkd.toi_debug_state))
+#define clear_debug_state(bit) \
+        (test_and_clear_bit(bit, &toi_bkd.toi_debug_state))
+#define test_debug_state(bit) (test_bit(bit, &toi_bkd.toi_debug_state))
+
+/*                == Steps in hibernating ==        */
+
+enum {
+        STEP_HIBERNATE_PREPARE_IMAGE,
+        STEP_HIBERNATE_SAVE_IMAGE,
+        STEP_HIBERNATE_POWERDOWN,
+        STEP_RESUME_CAN_RESUME,
+        STEP_RESUME_LOAD_PS1,
+        STEP_RESUME_DO_RESTORE,
+        STEP_RESUME_READ_PS2,
+        STEP_RESUME_GO,
+        STEP_RESUME_ALT_IMAGE,
+        STEP_CLEANUP,
+        STEP_QUIET_CLEANUP
+};
+
+/*                == TuxOnIce states ==
+        (see also include/linux/suspend.h)        */
+
+#define get_toi_state()  (toi_state)
+#define restore_toi_state(saved_state) \
+        do { toi_state = saved_state; } while (0)
+
+/*                == Module support ==                */
+
+struct toi_core_fns {
+        int (*post_context_save)(void);
+        unsigned long (*get_nonconflicting_page)(void);
+        int (*try_hibernate)(void);
+        void (*try_resume)(void);
+};
+
+extern struct toi_core_fns *toi_core_fns;
+
+/*                == All else ==                        */
+#define KB(x) ((x) << (PAGE_SHIFT - 10))
+#define MB(x) ((x) >> (20 - PAGE_SHIFT))
+
+extern int toi_start_anything(int toi_or_resume);
+extern void toi_finish_anything(int toi_or_resume);
+
+extern int save_image_part1(void);
+extern int toi_atomic_restore(void);
+
+extern int toi_try_hibernate(void);
+extern void toi_try_resume(void);
+
+extern int __toi_post_context_save(void);
+
+extern unsigned int nr_hibernates;
+extern char alt_resume_param[256];
+
+extern void copyback_post(void);
+extern int toi_hibernate(void);
+extern unsigned long extra_pd1_pages_used;
+
+#define SECTOR_SIZE 512
+
+extern void toi_early_boot_message(int can_erase_image, int default_answer,
+        char *warning_reason, ...);
+
+extern int do_check_can_resume(void);
+extern int do_toi_step(int step);
+extern int toi_launch_userspace_program(char *command, int channel_no,
+                int wait, int debug);
+
+extern char tuxonice_signature[9];
+
+extern int toi_start_other_threads(void);
+extern void toi_stop_other_threads(void);
+
+extern int toi_trace_index;
+#define TOI_TRACE_DEBUG(PFN, DESC, ...) \
+    do { \
+        if (test_action_state(TOI_TRACE_DEBUG_ON)) { \
+            printk("*TOI* %ld %02d" DESC "\n", PFN, toi_trace_index, ##__VA_ARGS__); \
+        } \
+    } while(0)
+
+#ifdef CONFIG_TOI_KEEP_IMAGE
+#define toi_keeping_image (test_action_state(TOI_KEEP_IMAGE) || test_action_state(TOI_INCREMENTAL_IMAGE))
+#else
+#define toi_keeping_image (0)
+#endif
+
+#ifdef CONFIG_TOI_INCREMENTAL
+extern void toi_reset_dirtiness_one(unsigned long pfn, int verbose);
+extern int toi_reset_dirtiness(int verbose);
+extern void toi_cbw_write(void);
+extern void toi_cbw_restore(void);
+extern int toi_allocate_cbw_data(void);
+extern void toi_free_cbw_data(void);
+extern int toi_cbw_init(void);
+extern void toi_mark_tasks_cbw(void);
+#else
+static inline int toi_reset_dirtiness(int verbose) { return 0; }
+#define toi_cbw_write() do { } while(0)
+#define toi_cbw_restore() do { } while(0)
+#define toi_allocate_cbw_data() do { } while(0)
+#define toi_free_cbw_data() do { } while(0)
+static inline int toi_cbw_init(void) { return 0; }
+#endif
+#endif
diff -Nur linux-4.2/kernel/power/tuxonice_alloc.c linux-4.2-pck/kernel/power/tuxonice_alloc.c
--- linux-4.2/kernel/power/tuxonice_alloc.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_alloc.c	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,308 @@
+/*
+ * kernel/power/tuxonice_alloc.c
+ *
+ * Copyright (C) 2008-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include "tuxonice_modules.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice.h"
+
+#define TOI_ALLOC_PATHS 41
+
+static DEFINE_MUTEX(toi_alloc_mutex);
+
+static struct toi_module_ops toi_alloc_ops;
+
+static int toi_fail_num;
+
+static atomic_t toi_alloc_count[TOI_ALLOC_PATHS],
+                toi_free_count[TOI_ALLOC_PATHS],
+                toi_test_count[TOI_ALLOC_PATHS],
+                toi_fail_count[TOI_ALLOC_PATHS];
+static int toi_cur_allocd[TOI_ALLOC_PATHS], toi_max_allocd[TOI_ALLOC_PATHS];
+static int cur_allocd, max_allocd;
+
+static char *toi_alloc_desc[TOI_ALLOC_PATHS] = {
+        "", /* 0 */
+        "get_io_info_struct",
+        "extent",
+        "extent (loading chain)",
+        "userui channel",
+        "userui arg", /* 5 */
+        "attention list metadata",
+        "extra pagedir memory metadata",
+        "bdev metadata",
+        "extra pagedir memory",
+        "header_locations_read", /* 10 */
+        "bio queue",
+        "prepare_readahead",
+        "i/o buffer",
+        "writer buffer in bio_init",
+        "checksum buffer", /* 15 */
+        "compression buffer",
+        "filewriter signature op",
+        "set resume param alloc1",
+        "set resume param alloc2",
+        "debugging info buffer", /* 20 */
+        "check can resume buffer",
+        "write module config buffer",
+        "read module config buffer",
+        "write image header buffer",
+        "read pageset1 buffer", /* 25 */
+        "get_have_image_data buffer",
+        "checksum page",
+        "worker rw loop",
+        "get nonconflicting page",
+        "ps1 load addresses", /* 30 */
+        "remove swap image",
+        "swap image exists",
+        "swap parse sig location",
+        "sysfs kobj",
+        "swap mark resume attempted buffer", /* 35 */
+        "cluster member",
+        "boot kernel data buffer",
+        "setting swap signature",
+        "block i/o bdev struct",
+        "copy before write", /* 40 */
+};
+
+#define MIGHT_FAIL(FAIL_NUM, FAIL_VAL) \
+        do { \
+                BUG_ON(FAIL_NUM >= TOI_ALLOC_PATHS); \
+                \
+                if (FAIL_NUM == toi_fail_num) { \
+                        atomic_inc(&toi_test_count[FAIL_NUM]); \
+                        toi_fail_num = 0; \
+                        return FAIL_VAL; \
+                } \
+        } while (0)
+
+static void alloc_update_stats(int fail_num, void *result, int size)
+{
+        if (!result) {
+                atomic_inc(&toi_fail_count[fail_num]);
+                return;
+        }
+
+        atomic_inc(&toi_alloc_count[fail_num]);
+        if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
+                mutex_lock(&toi_alloc_mutex);
+                toi_cur_allocd[fail_num]++;
+                cur_allocd += size;
+                if (unlikely(cur_allocd > max_allocd)) {
+                        int i;
+
+                        for (i = 0; i < TOI_ALLOC_PATHS; i++)
+                                toi_max_allocd[i] = toi_cur_allocd[i];
+                        max_allocd = cur_allocd;
+                }
+                mutex_unlock(&toi_alloc_mutex);
+        }
+}
+
+static void free_update_stats(int fail_num, int size)
+{
+        BUG_ON(fail_num >= TOI_ALLOC_PATHS);
+        atomic_inc(&toi_free_count[fail_num]);
+        if (unlikely(atomic_read(&toi_free_count[fail_num]) >
+                                atomic_read(&toi_alloc_count[fail_num])))
+                dump_stack();
+        if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
+                mutex_lock(&toi_alloc_mutex);
+                cur_allocd -= size;
+                toi_cur_allocd[fail_num]--;
+                mutex_unlock(&toi_alloc_mutex);
+        }
+}
+
+void *toi_kzalloc(int fail_num, size_t size, gfp_t flags)
+{
+        void *result;
+
+        if (toi_alloc_ops.enabled)
+                MIGHT_FAIL(fail_num, NULL);
+        result = kzalloc(size, flags);
+        if (toi_alloc_ops.enabled)
+                alloc_update_stats(fail_num, result, size);
+        if (fail_num == toi_trace_allocs)
+                dump_stack();
+        return result;
+}
+
+unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
+                unsigned int order)
+{
+        unsigned long result;
+
+        mask |= ___GFP_TOI_NOTRACK;
+        if (toi_alloc_ops.enabled)
+                MIGHT_FAIL(fail_num, 0);
+        result = __get_free_pages(mask, order);
+        if (toi_alloc_ops.enabled)
+                alloc_update_stats(fail_num, (void *) result,
+                                PAGE_SIZE << order);
+        if (fail_num == toi_trace_allocs)
+                dump_stack();
+        return result;
+}
+
+struct page *toi_alloc_page(int fail_num, gfp_t mask)
+{
+        struct page *result;
+
+        if (toi_alloc_ops.enabled)
+                MIGHT_FAIL(fail_num, NULL);
+        mask |= ___GFP_TOI_NOTRACK;
+        result = alloc_page(mask);
+        if (toi_alloc_ops.enabled)
+                alloc_update_stats(fail_num, (void *) result, PAGE_SIZE);
+        if (fail_num == toi_trace_allocs)
+                dump_stack();
+        return result;
+}
+
+unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask)
+{
+        unsigned long result;
+
+        if (toi_alloc_ops.enabled)
+                MIGHT_FAIL(fail_num, 0);
+        mask |= ___GFP_TOI_NOTRACK;
+        result = get_zeroed_page(mask);
+        if (toi_alloc_ops.enabled)
+                alloc_update_stats(fail_num, (void *) result, PAGE_SIZE);
+        if (fail_num == toi_trace_allocs)
+                dump_stack();
+        return result;
+}
+
+void toi_kfree(int fail_num, const void *arg, int size)
+{
+        if (arg && toi_alloc_ops.enabled)
+                free_update_stats(fail_num, size);
+
+        if (fail_num == toi_trace_allocs)
+                dump_stack();
+        kfree(arg);
+}
+
+void toi_free_page(int fail_num, unsigned long virt)
+{
+        if (virt && toi_alloc_ops.enabled)
+                free_update_stats(fail_num, PAGE_SIZE);
+
+        if (fail_num == toi_trace_allocs)
+                dump_stack();
+        free_page(virt);
+}
+
+void toi__free_page(int fail_num, struct page *page)
+{
+        if (page && toi_alloc_ops.enabled)
+                free_update_stats(fail_num, PAGE_SIZE);
+
+        if (fail_num == toi_trace_allocs)
+                dump_stack();
+        __free_page(page);
+}
+
+void toi_free_pages(int fail_num, struct page *page, int order)
+{
+        if (page && toi_alloc_ops.enabled)
+                free_update_stats(fail_num, PAGE_SIZE << order);
+
+        if (fail_num == toi_trace_allocs)
+                dump_stack();
+        __free_pages(page, order);
+}
+
+void toi_alloc_print_debug_stats(void)
+{
+        int i, header_done = 0;
+
+        if (!toi_alloc_ops.enabled)
+                return;
+
+        for (i = 0; i < TOI_ALLOC_PATHS; i++)
+                if (atomic_read(&toi_alloc_count[i]) !=
+                    atomic_read(&toi_free_count[i])) {
+                        if (!header_done) {
+                                printk(KERN_INFO "Idx  Allocs   Frees   Tests "
+                                        "  Fails     Max Description\n");
+                                header_done = 1;
+                        }
+
+                        printk(KERN_INFO "%3d %7d %7d %7d %7d %7d %s\n", i,
+                                atomic_read(&toi_alloc_count[i]),
+                                atomic_read(&toi_free_count[i]),
+                                atomic_read(&toi_test_count[i]),
+                                atomic_read(&toi_fail_count[i]),
+                                toi_max_allocd[i],
+                                toi_alloc_desc[i]);
+                }
+}
+
+static int toi_alloc_initialise(int starting_cycle)
+{
+        int i;
+
+        if (!starting_cycle)
+                return 0;
+
+        if (toi_trace_allocs)
+                dump_stack();
+
+        for (i = 0; i < TOI_ALLOC_PATHS; i++) {
+                atomic_set(&toi_alloc_count[i], 0);
+                atomic_set(&toi_free_count[i], 0);
+                atomic_set(&toi_test_count[i], 0);
+                atomic_set(&toi_fail_count[i], 0);
+                toi_cur_allocd[i] = 0;
+                toi_max_allocd[i] = 0;
+        };
+
+        max_allocd = 0;
+        cur_allocd = 0;
+        return 0;
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_INT("failure_test", SYSFS_RW, &toi_fail_num, 0, 99, 0, NULL),
+        SYSFS_INT("trace", SYSFS_RW, &toi_trace_allocs, 0, TOI_ALLOC_PATHS, 0,
+                        NULL),
+        SYSFS_BIT("find_max_mem_allocated", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_GET_MAX_MEM_ALLOCD, 0),
+        SYSFS_INT("enabled", SYSFS_RW, &toi_alloc_ops.enabled, 0, 1, 0,
+                        NULL)
+};
+
+static struct toi_module_ops toi_alloc_ops = {
+        .type                                        = MISC_HIDDEN_MODULE,
+        .name                                        = "allocation debugging",
+        .directory                                = "alloc",
+        .module                                        = THIS_MODULE,
+        .early                                        = 1,
+        .initialise                                = toi_alloc_initialise,
+
+        .sysfs_data                = sysfs_params,
+        .num_sysfs_entries        = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+int toi_alloc_init(void)
+{
+        int result = toi_register_module(&toi_alloc_ops);
+        return result;
+}
+
+void toi_alloc_exit(void)
+{
+        toi_unregister_module(&toi_alloc_ops);
+}
diff -Nur linux-4.2/kernel/power/tuxonice_alloc.h linux-4.2-pck/kernel/power/tuxonice_alloc.h
--- linux-4.2/kernel/power/tuxonice_alloc.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_alloc.h	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,54 @@
+/*
+ * kernel/power/tuxonice_alloc.h
+ *
+ * Copyright (C) 2008-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/slab.h>
+#define TOI_WAIT_GFP (GFP_NOFS | __GFP_NOWARN)
+#define TOI_ATOMIC_GFP (GFP_ATOMIC | __GFP_NOWARN)
+
+#ifdef CONFIG_PM_DEBUG
+extern void *toi_kzalloc(int fail_num, size_t size, gfp_t flags);
+extern void toi_kfree(int fail_num, const void *arg, int size);
+
+extern unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
+                unsigned int order);
+#define toi_get_free_page(FAIL_NUM, MASK) toi_get_free_pages(FAIL_NUM, MASK, 0)
+extern unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask);
+extern void toi_free_page(int fail_num, unsigned long buf);
+extern void toi__free_page(int fail_num, struct page *page);
+extern void toi_free_pages(int fail_num, struct page *page, int order);
+extern struct page *toi_alloc_page(int fail_num, gfp_t mask);
+extern int toi_alloc_init(void);
+extern void toi_alloc_exit(void);
+
+extern void toi_alloc_print_debug_stats(void);
+
+#else /* CONFIG_PM_DEBUG */
+
+#define toi_kzalloc(FAIL, SIZE, FLAGS) (kzalloc(SIZE, FLAGS))
+#define toi_kfree(FAIL, ALLOCN, SIZE) (kfree(ALLOCN))
+
+#define toi_get_free_pages(FAIL, FLAGS, ORDER) __get_free_pages(FLAGS, ORDER)
+#define toi_get_free_page(FAIL, FLAGS) __get_free_page(FLAGS)
+#define toi_get_zeroed_page(FAIL, FLAGS) get_zeroed_page(FLAGS)
+#define toi_free_page(FAIL, ALLOCN) do { free_page(ALLOCN); } while (0)
+#define toi__free_page(FAIL, PAGE) __free_page(PAGE)
+#define toi_free_pages(FAIL, PAGE, ORDER) __free_pages(PAGE, ORDER)
+#define toi_alloc_page(FAIL, MASK) alloc_page(MASK)
+static inline int toi_alloc_init(void)
+{
+        return 0;
+}
+
+static inline void toi_alloc_exit(void) { }
+
+static inline void toi_alloc_print_debug_stats(void) { }
+
+#endif
+
+extern int toi_trace_allocs;
diff -Nur linux-4.2/kernel/power/tuxonice_atomic_copy.c linux-4.2-pck/kernel/power/tuxonice_atomic_copy.c
--- linux-4.2/kernel/power/tuxonice_atomic_copy.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_atomic_copy.c	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,469 @@
+/*
+ * kernel/power/tuxonice_atomic_copy.c
+ *
+ * Copyright 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * Routines for doing the atomic save/restore.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/cpu.h>
+#include <linux/freezer.h>
+#include <linux/console.h>
+#include <linux/syscore_ops.h>
+#include <linux/ftrace.h>
+#include <asm/suspend.h>
+#include "tuxonice.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_io.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_atomic_copy.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_modules.h"
+
+unsigned long extra_pd1_pages_used;
+
+/**
+ * free_pbe_list - free page backup entries used by the atomic copy code.
+ * @list:        List to free.
+ * @highmem:        Whether the list is in highmem.
+ *
+ * Normally, this function isn't used. If, however, we need to abort before
+ * doing the atomic copy, we use this to free the pbes previously allocated.
+ **/
+static void free_pbe_list(struct pbe **list, int highmem)
+{
+        while (*list) {
+                int i;
+                struct pbe *free_pbe, *next_page = NULL;
+                struct page *page;
+
+                if (highmem) {
+                        page = (struct page *) *list;
+                        free_pbe = (struct pbe *) kmap(page);
+                } else {
+                        page = virt_to_page(*list);
+                        free_pbe = *list;
+                }
+
+                for (i = 0; i < PBES_PER_PAGE; i++) {
+                        if (!free_pbe)
+                                break;
+                        if (highmem)
+                                toi__free_page(29, free_pbe->address);
+                        else
+                                toi_free_page(29,
+                                        (unsigned long) free_pbe->address);
+                        free_pbe = free_pbe->next;
+                }
+
+                if (highmem) {
+                        if (free_pbe)
+                                next_page = free_pbe;
+                        kunmap(page);
+                } else {
+                        if (free_pbe)
+                                next_page = free_pbe;
+                }
+
+                toi__free_page(29, page);
+                *list = (struct pbe *) next_page;
+        };
+}
+
+/**
+ * copyback_post - post atomic-restore actions
+ *
+ * After doing the atomic restore, we have a few more things to do:
+ *        1) We want to retain some values across the restore, so we now copy
+ *        these from the nosave variables to the normal ones.
+ *        2) Set the status flags.
+ *        3) Resume devices.
+ *        4) Tell userui so it can redraw & restore settings.
+ *        5) Reread the page cache.
+ **/
+void copyback_post(void)
+{
+        struct toi_boot_kernel_data *bkd =
+                (struct toi_boot_kernel_data *) boot_kernel_data_buffer;
+
+        if (toi_activate_storage(1))
+                panic("Failed to reactivate our storage.");
+
+        toi_post_atomic_restore_modules(bkd);
+
+        toi_cond_pause(1, "About to reload secondary pagedir.");
+
+        if (read_pageset2(0))
+                panic("Unable to successfully reread the page cache.");
+
+        /*
+         * If the user wants to sleep again after resuming from full-off,
+         * it's most likely to be in order to suspend to ram, so we'll
+         * do this check after loading pageset2, to give them the fastest
+         * wakeup when they are ready to use the computer again.
+         */
+        toi_check_resleep();
+
+        if (test_action_state(TOI_INCREMENTAL_IMAGE))
+            toi_reset_dirtiness(1);
+}
+
+/**
+ * toi_copy_pageset1 - do the atomic copy of pageset1
+ *
+ * Make the atomic copy of pageset1. We can't use copy_page (as we once did)
+ * because we can't be sure what side effects it has. On my old Duron, with
+ * 3DNOW, kernel_fpu_begin increments preempt count, making our preempt
+ * count at resume time 4 instead of 3.
+ *
+ * We don't want to call kmap_atomic unconditionally because it has the side
+ * effect of incrementing the preempt count, which will leave it one too high
+ * post resume (the page containing the preempt count will be copied after
+ * its incremented. This is essentially the same problem.
+ **/
+void toi_copy_pageset1(void)
+{
+        int i;
+        unsigned long source_index, dest_index;
+
+        memory_bm_position_reset(pageset1_map);
+        memory_bm_position_reset(pageset1_copy_map);
+
+        source_index = memory_bm_next_pfn(pageset1_map, 0);
+        dest_index = memory_bm_next_pfn(pageset1_copy_map, 0);
+
+        for (i = 0; i < pagedir1.size; i++) {
+                unsigned long *origvirt, *copyvirt;
+                struct page *origpage, *copypage;
+                int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1,
+                    was_present1, was_present2;
+
+                origpage = pfn_to_page(source_index);
+                copypage = pfn_to_page(dest_index);
+
+                origvirt = PageHighMem(origpage) ?
+                        kmap_atomic(origpage) :
+                        page_address(origpage);
+
+                copyvirt = PageHighMem(copypage) ?
+                        kmap_atomic(copypage) :
+                        page_address(copypage);
+
+                was_present1 = kernel_page_present(origpage);
+                if (!was_present1)
+                        kernel_map_pages(origpage, 1, 1);
+
+                was_present2 = kernel_page_present(copypage);
+                if (!was_present2)
+                        kernel_map_pages(copypage, 1, 1);
+
+                while (loop >= 0) {
+                        *(copyvirt + loop) = *(origvirt + loop);
+                        loop--;
+                }
+
+                if (!was_present1)
+                        kernel_map_pages(origpage, 1, 0);
+
+                if (!was_present2)
+                        kernel_map_pages(copypage, 1, 0);
+
+                if (PageHighMem(origpage))
+                        kunmap_atomic(origvirt);
+
+                if (PageHighMem(copypage))
+                        kunmap_atomic(copyvirt);
+
+                source_index = memory_bm_next_pfn(pageset1_map, 0);
+                dest_index = memory_bm_next_pfn(pageset1_copy_map, 0);
+        }
+}
+
+/**
+ * __toi_post_context_save - steps after saving the cpu context
+ *
+ * Steps taken after saving the CPU state to make the actual
+ * atomic copy.
+ *
+ * Called from swsusp_save in snapshot.c via toi_post_context_save.
+ **/
+int __toi_post_context_save(void)
+{
+        unsigned long old_ps1_size = pagedir1.size;
+
+        check_checksums();
+
+        free_checksum_pages();
+
+        toi_recalculate_image_contents(1);
+
+        extra_pd1_pages_used = pagedir1.size > old_ps1_size ?
+                pagedir1.size - old_ps1_size : 0;
+
+        if (extra_pd1_pages_used > extra_pd1_pages_allowance) {
+                printk(KERN_INFO "Pageset1 has grown by %lu pages. "
+                        "extra_pages_allowance is currently only %lu.\n",
+                        pagedir1.size - old_ps1_size,
+                        extra_pd1_pages_allowance);
+
+                /*
+                 * Highlevel code will see this, clear the state and
+                 * retry if we haven't already done so twice.
+                 */
+                if (any_to_free(1)) {
+                        set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
+                        return 1;
+                }
+                if (try_allocate_extra_memory()) {
+                        printk(KERN_INFO "Failed to allocate the extra memory"
+                                        " needed. Restarting the process.");
+                        set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
+                        return 1;
+                }
+                printk(KERN_INFO "However it looks like there's enough"
+                        " free ram and storage to handle this, so "
+                        " continuing anyway.");
+                /* 
+                 * What if try_allocate_extra_memory above calls
+                 * toi_allocate_extra_pagedir_memory and it allocs a new
+                 * slab page via toi_kzalloc which should be in ps1? So...
+                 */
+                toi_recalculate_image_contents(1);
+        }
+
+        if (!test_action_state(TOI_TEST_FILTER_SPEED) &&
+            !test_action_state(TOI_TEST_BIO))
+                toi_copy_pageset1();
+
+        return 0;
+}
+
+/**
+ * toi_hibernate - high level code for doing the atomic copy
+ *
+ * High-level code which prepares to do the atomic copy. Loosely based
+ * on the swsusp version, but with the following twists:
+ *        - We set toi_running so the swsusp code uses our code paths.
+ *        - We give better feedback regarding what goes wrong if there is a
+ *          problem.
+ *        - We use an extra function to call the assembly, just in case this code
+ *          is in a module (return address).
+ **/
+int toi_hibernate(void)
+{
+        int error;
+
+        error = toi_lowlevel_builtin();
+
+        if (!error) {
+                struct toi_boot_kernel_data *bkd =
+                        (struct toi_boot_kernel_data *) boot_kernel_data_buffer;
+
+                /*
+                 * The boot kernel's data may be larger (newer version) or
+                 * smaller (older version) than ours. Copy the minimum
+                 * of the two sizes, so that we don't overwrite valid values
+                 * from pre-atomic copy.
+                 */
+
+                memcpy(&toi_bkd, (char *) boot_kernel_data_buffer,
+                        min_t(int, sizeof(struct toi_boot_kernel_data),
+                                bkd->size));
+        }
+
+        return error;
+}
+
+/**
+ * toi_atomic_restore - prepare to do the atomic restore
+ *
+ * Get ready to do the atomic restore. This part gets us into the same
+ * state we are in prior to do calling do_toi_lowlevel while
+ * hibernating: hot-unplugging secondary cpus and freeze processes,
+ * before starting the thread that will do the restore.
+ **/
+int toi_atomic_restore(void)
+{
+        int error;
+
+        toi_prepare_status(DONT_CLEAR_BAR,        "Atomic restore.");
+
+        memcpy(&toi_bkd.toi_nosave_commandline, saved_command_line,
+                strlen(saved_command_line));
+
+        toi_pre_atomic_restore_modules(&toi_bkd);
+
+        if (add_boot_kernel_data_pbe())
+                goto Failed;
+
+        toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
+
+        if (toi_go_atomic(PMSG_QUIESCE, 0))
+                goto Failed;
+
+        /* We'll ignore saved state, but this gets preempt count (etc) right */
+        save_processor_state();
+
+        error = swsusp_arch_resume();
+        /*
+         * Code below is only ever reached in case of failure. Otherwise
+         * execution continues at place where swsusp_arch_suspend was called.
+         *
+         * We don't know whether it's safe to continue (this shouldn't happen),
+         * so lets err on the side of caution.
+         */
+        BUG();
+
+Failed:
+        free_pbe_list(&restore_pblist, 0);
+#ifdef CONFIG_HIGHMEM
+        free_pbe_list(&restore_highmem_pblist, 1);
+#endif
+        return 1;
+}
+
+/**
+ * toi_go_atomic - do the actual atomic copy/restore
+ * @state:           The state to use for dpm_suspend_start & power_down calls.
+ * @suspend_time:  Whether we're suspending or resuming.
+ **/
+int toi_go_atomic(pm_message_t state, int suspend_time)
+{
+  if (suspend_time) {
+    if (platform_begin(1)) {
+      set_abort_result(TOI_PLATFORM_PREP_FAILED);
+      toi_end_atomic(ATOMIC_STEP_PLATFORM_END, suspend_time, 3);
+      return 1;
+    }
+
+    if (dpm_prepare(PMSG_FREEZE)) {
+      set_abort_result(TOI_DPM_PREPARE_FAILED);
+      dpm_complete(PMSG_RECOVER);
+      toi_end_atomic(ATOMIC_STEP_PLATFORM_END, suspend_time, 3);
+      return 1;
+    }
+  }
+
+        suspend_console();
+        pm_restrict_gfp_mask();
+
+  if (suspend_time) {
+    if (dpm_suspend(state)) {
+      set_abort_result(TOI_DPM_SUSPEND_FAILED);
+      toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3);
+      return 1;
+    }
+  } else {
+    if (dpm_suspend_start(state)) {
+      set_abort_result(TOI_DPM_SUSPEND_FAILED);
+      toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3);
+      return 1;
+    }
+  }
+
+        /* At this point, dpm_suspend_start() has been called, but *not*
+         * dpm_suspend_noirq(). We *must* dpm_suspend_noirq() now.
+         * Otherwise, drivers for some devices (e.g. interrupt controllers)
+         * become desynchronized with the actual state of the hardware
+         * at resume time, and evil weirdness ensues.
+         */
+
+        if (dpm_suspend_end(state)) {
+                set_abort_result(TOI_DEVICE_REFUSED);
+                toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1);
+                return 1;
+        }
+
+        if (suspend_time) {
+                if (platform_pre_snapshot(1))
+                        set_abort_result(TOI_PRE_SNAPSHOT_FAILED);
+        } else {
+                if (platform_pre_restore(1))
+                        set_abort_result(TOI_PRE_RESTORE_FAILED);
+        }
+
+        if (test_result_state(TOI_ABORTED)) {
+                toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1);
+                return 1;
+        }
+
+        if (disable_nonboot_cpus()) {
+            set_abort_result(TOI_CPU_HOTPLUG_FAILED);
+            toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG,
+                    suspend_time, 1);
+            return 1;
+        }
+
+        local_irq_disable();
+
+        if (syscore_suspend()) {
+                set_abort_result(TOI_SYSCORE_REFUSED);
+                toi_end_atomic(ATOMIC_STEP_IRQS, suspend_time, 1);
+                return 1;
+        }
+
+        if (suspend_time && pm_wakeup_pending()) {
+                set_abort_result(TOI_WAKEUP_EVENT);
+                toi_end_atomic(ATOMIC_STEP_SYSCORE_RESUME, suspend_time, 1);
+                return 1;
+        }
+        return 0;
+}
+
+/**
+ * toi_end_atomic - post atomic copy/restore routines
+ * @stage:                What step to start at.
+ * @suspend_time:        Whether we're suspending or resuming.
+ * @error:                Whether we're recovering from an error.
+ **/
+void toi_end_atomic(int stage, int suspend_time, int error)
+{
+        pm_message_t msg = suspend_time ? (error ? PMSG_RECOVER : PMSG_THAW) :
+                PMSG_RESTORE;
+
+        switch (stage) {
+        case ATOMIC_ALL_STEPS:
+                if (!suspend_time) {
+                        events_check_enabled = false;
+                }
+                platform_leave(1);
+        case ATOMIC_STEP_SYSCORE_RESUME:
+                syscore_resume();
+        case ATOMIC_STEP_IRQS:
+                local_irq_enable();
+        case ATOMIC_STEP_CPU_HOTPLUG:
+                enable_nonboot_cpus();
+        case ATOMIC_STEP_PLATFORM_FINISH:
+                if (!suspend_time && error & 2)
+                        platform_restore_cleanup(1);
+                else 
+                        platform_finish(1);
+                dpm_resume_start(msg);
+        case ATOMIC_STEP_DEVICE_RESUME:
+                if (suspend_time && (error & 2))
+                        platform_recover(1);
+                dpm_resume(msg);
+                if (!toi_in_suspend()) {
+                    dpm_resume_end(PMSG_RECOVER);
+                }
+                if (error || !toi_in_suspend()) {
+                        pm_restore_gfp_mask();
+                }
+                resume_console();
+        case ATOMIC_STEP_DPM_COMPLETE:
+                dpm_complete(msg);
+        case ATOMIC_STEP_PLATFORM_END:
+                platform_end(1);
+
+                toi_prepare_status(DONT_CLEAR_BAR, "Post atomic.");
+        }
+}
diff -Nur linux-4.2/kernel/power/tuxonice_atomic_copy.h linux-4.2-pck/kernel/power/tuxonice_atomic_copy.h
--- linux-4.2/kernel/power/tuxonice_atomic_copy.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_atomic_copy.h	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,25 @@
+/*
+ * kernel/power/tuxonice_atomic_copy.h
+ *
+ * Copyright 2008-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * Routines for doing the atomic save/restore.
+ */
+
+enum {
+        ATOMIC_ALL_STEPS,
+        ATOMIC_STEP_SYSCORE_RESUME,
+        ATOMIC_STEP_IRQS,
+        ATOMIC_STEP_CPU_HOTPLUG,
+        ATOMIC_STEP_PLATFORM_FINISH,
+        ATOMIC_STEP_DEVICE_RESUME,
+        ATOMIC_STEP_DPM_COMPLETE,
+        ATOMIC_STEP_PLATFORM_END,
+};
+
+int toi_go_atomic(pm_message_t state, int toi_time);
+void toi_end_atomic(int stage, int toi_time, int error);
+
+extern void platform_recover(int platform_mode);
diff -Nur linux-4.2/kernel/power/tuxonice_bio.h linux-4.2-pck/kernel/power/tuxonice_bio.h
--- linux-4.2/kernel/power/tuxonice_bio.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_bio.h	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,78 @@
+/*
+ * kernel/power/tuxonice_bio.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file contains declarations for functions exported from
+ * tuxonice_bio.c, which contains low level io functions.
+ */
+
+#include <linux/buffer_head.h>
+#include "tuxonice_extent.h"
+
+void toi_put_extent_chain(struct hibernate_extent_chain *chain);
+int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
+                unsigned long start, unsigned long end);
+
+struct hibernate_extent_saved_state {
+        int extent_num;
+        struct hibernate_extent *extent_ptr;
+        unsigned long offset;
+};
+
+struct toi_bdev_info {
+        struct toi_bdev_info *next;
+        struct hibernate_extent_chain blocks;
+        struct block_device *bdev;
+        struct toi_module_ops *allocator;
+        int allocator_index;
+        struct hibernate_extent_chain allocations;
+        char name[266]; /* "swap on " or "file " + up to 256 chars */
+
+        /* Saved in header */
+        char uuid[17];
+        dev_t dev_t;
+        int prio;
+        int bmap_shift;
+        int blocks_per_page;
+        unsigned long pages_used;
+        struct hibernate_extent_saved_state saved_state[4];
+};
+
+struct toi_extent_iterate_state {
+        struct toi_bdev_info *current_chain;
+        int num_chains;
+        int saved_chain_number[4];
+        struct toi_bdev_info *saved_chain_ptr[4];
+};
+
+/*
+ * Our exported interface so the swapwriter and filewriter don't
+ * need these functions duplicated.
+ */
+struct toi_bio_ops {
+        int (*bdev_page_io) (int rw, struct block_device *bdev, long pos,
+                        struct page *page);
+        int (*register_storage)(struct toi_bdev_info *new);
+        void (*free_storage)(void);
+};
+
+struct toi_allocator_ops {
+        unsigned long (*toi_swap_storage_available) (void);
+};
+
+extern struct toi_bio_ops toi_bio_ops;
+
+extern char *toi_writer_buffer;
+extern int toi_writer_buffer_posn;
+
+struct toi_bio_allocator_ops {
+        int (*register_storage) (void);
+        unsigned long (*storage_available)(void);
+        int (*allocate_storage) (struct toi_bdev_info *, unsigned long);
+        int (*bmap) (struct toi_bdev_info *);
+        void (*free_storage) (struct toi_bdev_info *);
+        unsigned long (*free_unused_storage) (struct toi_bdev_info *, unsigned long used);
+};
diff -Nur linux-4.2/kernel/power/tuxonice_bio_chains.c linux-4.2-pck/kernel/power/tuxonice_bio_chains.c
--- linux-4.2/kernel/power/tuxonice_bio_chains.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_bio_chains.c	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,1126 @@
+/*
+ * kernel/power/tuxonice_bio_devinfo.c
+ *
+ * Copyright (C) 2009-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ */
+
+#include <linux/mm_types.h>
+#include "tuxonice_bio.h"
+#include "tuxonice_bio_internal.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_ui.h"
+#include "tuxonice.h"
+#include "tuxonice_io.h"
+
+static struct toi_bdev_info *prio_chain_head;
+static int num_chains;
+
+/* Pointer to current entry being loaded/saved. */
+struct toi_extent_iterate_state toi_writer_posn;
+
+#define metadata_size (sizeof(struct toi_bdev_info) - \
+                offsetof(struct toi_bdev_info, uuid))
+
+/*
+ * After section 0 (header) comes 2 => next_section[0] = 2
+ */
+static int next_section[3] = { 2, 3, 1 };
+
+/**
+ * dump_block_chains - print the contents of the bdev info array.
+ **/
+void dump_block_chains(void)
+{
+        int i = 0;
+        int j;
+        struct toi_bdev_info *cur_chain = prio_chain_head;
+
+        while (cur_chain) {
+                struct hibernate_extent *this = cur_chain->blocks.first;
+
+                printk(KERN_DEBUG "Chain %d (prio %d):", i, cur_chain->prio);
+
+                while (this) {
+                        printk(KERN_CONT " [%lu-%lu]%s", this->start,
+                                        this->end, this->next ? "," : "");
+                        this = this->next;
+                }
+
+                printk("\n");
+                cur_chain = cur_chain->next;
+                i++;
+        }
+
+        printk(KERN_DEBUG "Saved states:\n");
+        for (i = 0; i < 4; i++) {
+                printk(KERN_DEBUG "Slot %d: Chain %d.\n",
+                        i, toi_writer_posn.saved_chain_number[i]);
+
+                cur_chain = prio_chain_head;
+                j = 0;
+                while (cur_chain) {
+                        printk(KERN_DEBUG " Chain %d: Extent %d. Offset %lu.\n",
+                                        j, cur_chain->saved_state[i].extent_num,
+                                        cur_chain->saved_state[i].offset);
+                        cur_chain = cur_chain->next;
+                        j++;
+                }
+                printk(KERN_CONT "\n");
+        }
+}
+
+/**
+ *
+ **/
+static void toi_extent_chain_next(void)
+{
+        struct toi_bdev_info *this = toi_writer_posn.current_chain;
+
+        if (!this->blocks.current_extent)
+                return;
+
+        if (this->blocks.current_offset == this->blocks.current_extent->end) {
+                if (this->blocks.current_extent->next) {
+                        this->blocks.current_extent =
+                                this->blocks.current_extent->next;
+                        this->blocks.current_offset =
+                                this->blocks.current_extent->start;
+                } else {
+                        this->blocks.current_extent = NULL;
+                        this->blocks.current_offset = 0;
+                }
+        } else
+                this->blocks.current_offset++;
+}
+
+/**
+ *
+ */
+
+static struct toi_bdev_info *__find_next_chain_same_prio(void)
+{
+        struct toi_bdev_info *start_chain = toi_writer_posn.current_chain;
+        struct toi_bdev_info *this = start_chain;
+        int orig_prio = this->prio;
+
+        do {
+                this = this->next;
+
+                if (!this)
+                        this = prio_chain_head;
+
+                /* Back on original chain? Use it again. */
+                if (this == start_chain)
+                        return start_chain;
+
+        } while (!this->blocks.current_extent || this->prio != orig_prio);
+
+        return this;
+}
+
+static void find_next_chain(void)
+{
+        struct toi_bdev_info *this;
+
+        this = __find_next_chain_same_prio();
+
+        /*
+         * If we didn't get another chain of the same priority that we
+         * can use, look for the next priority.
+         */
+        while (this && !this->blocks.current_extent)
+                this = this->next;
+
+        toi_writer_posn.current_chain = this;
+}
+
+/**
+ * toi_extent_state_next - go to the next extent
+ * @blocks: The number of values to progress.
+ * @stripe_mode: Whether to spread usage across all chains.
+ *
+ * Given a state, progress to the next valid entry. We may begin in an
+ * invalid state, as we do when invoked after extent_state_goto_start below.
+ *
+ * When using compression and expected_compression > 0, we let the image size
+ * be larger than storage, so we can validly run out of data to return.
+ **/
+static unsigned long toi_extent_state_next(int blocks, int current_stream)
+{
+        int i;
+
+        if (!toi_writer_posn.current_chain)
+                return -ENOSPC;
+
+        /* Assume chains always have lengths that are multiples of @blocks */
+        for (i = 0; i < blocks; i++)
+                toi_extent_chain_next();
+
+        /* The header stream is not striped */
+        if (current_stream ||
+            !toi_writer_posn.current_chain->blocks.current_extent)
+                find_next_chain();
+
+        return  toi_writer_posn.current_chain ? 0 : -ENOSPC;
+}
+
+static void toi_insert_chain_in_prio_list(struct toi_bdev_info *this)
+{
+        struct toi_bdev_info **prev_ptr;
+        struct toi_bdev_info *cur;
+
+        /* Loop through the existing chain, finding where to insert it */
+        prev_ptr = &prio_chain_head;
+        cur = prio_chain_head;
+
+        while (cur && cur->prio >= this->prio) {
+                prev_ptr = &cur->next;
+                cur = cur->next;
+        }
+
+        this->next = *prev_ptr;
+        *prev_ptr = this;
+
+        this = prio_chain_head;
+        while (this)
+                this = this->next;
+        num_chains++;
+}
+
+/**
+ * toi_extent_state_goto_start - reinitialize an extent chain iterator
+ * @state:        Iterator to reinitialize
+ **/
+void toi_extent_state_goto_start(void)
+{
+        struct toi_bdev_info *this = prio_chain_head;
+
+        while (this) {
+                toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                        "Setting current extent to %p.", this->blocks.first);
+                this->blocks.current_extent = this->blocks.first;
+                if (this->blocks.current_extent) {
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                                        "Setting current offset to %lu.",
+                                        this->blocks.current_extent->start);
+                        this->blocks.current_offset =
+                                this->blocks.current_extent->start;
+                }
+
+                this = this->next;
+        }
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Setting current chain to %p.",
+                        prio_chain_head);
+        toi_writer_posn.current_chain = prio_chain_head;
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Leaving extent state goto start.");
+}
+
+/**
+ * toi_extent_state_save - save state of the iterator
+ * @state:                Current state of the chain
+ * @saved_state:        Iterator to populate
+ *
+ * Given a state and a struct hibernate_extent_state_store, save the current
+ * position in a format that can be used with relocated chains (at
+ * resume time).
+ **/
+void toi_extent_state_save(int slot)
+{
+        struct toi_bdev_info *cur_chain = prio_chain_head;
+        struct hibernate_extent *extent;
+        struct hibernate_extent_saved_state *chain_state;
+        int i = 0;
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_extent_state_save, slot %d.",
+                        slot);
+
+        if (!toi_writer_posn.current_chain) {
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, "No current chain => "
+                                "chain_num = -1.");
+                toi_writer_posn.saved_chain_number[slot] = -1;
+                return;
+        }
+
+        while (cur_chain) {
+                i++;
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, "Saving chain %d (%p) "
+                                "state, slot %d.", i, cur_chain, slot);
+
+                chain_state = &cur_chain->saved_state[slot];
+
+                chain_state->offset = cur_chain->blocks.current_offset;
+
+                if (toi_writer_posn.current_chain == cur_chain) {
+                        toi_writer_posn.saved_chain_number[slot] = i;
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0, "This is the chain "
+                                        "we were on => chain_num is %d.", i);
+                }
+
+                if (!cur_chain->blocks.current_extent) {
+                        chain_state->extent_num = 0;
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0, "No current extent "
+                                        "for this chain => extent_num %d is 0.",
+                                        i);
+                        cur_chain = cur_chain->next;
+                        continue;
+                }
+
+                extent = cur_chain->blocks.first;
+                chain_state->extent_num = 1;
+
+                while (extent != cur_chain->blocks.current_extent) {
+                        chain_state->extent_num++;
+                        extent = extent->next;
+                }
+
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, "extent num %d is %d.", i,
+                                chain_state->extent_num);
+
+                cur_chain = cur_chain->next;
+        }
+        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                        "Completed saving extent state slot %d.", slot);
+}
+
+/**
+ * toi_extent_state_restore - restore the position saved by extent_state_save
+ * @state:                State to populate
+ * @saved_state:        Iterator saved to restore
+ **/
+void toi_extent_state_restore(int slot)
+{
+        int i = 0;
+        struct toi_bdev_info *cur_chain = prio_chain_head;
+        struct hibernate_extent_saved_state *chain_state;
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                        "toi_extent_state_restore - slot %d.", slot);
+
+        if (toi_writer_posn.saved_chain_number[slot] == -1) {
+                toi_writer_posn.current_chain = NULL;
+                return;
+        }
+
+        while (cur_chain) {
+                int posn;
+                int j;
+                i++;
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, "Restoring chain %d (%p) "
+                                "state, slot %d.", i, cur_chain, slot);
+
+                chain_state = &cur_chain->saved_state[slot];
+
+                posn = chain_state->extent_num;
+
+                cur_chain->blocks.current_extent = cur_chain->blocks.first;
+                cur_chain->blocks.current_offset = chain_state->offset;
+
+                if (i == toi_writer_posn.saved_chain_number[slot]) {
+                        toi_writer_posn.current_chain = cur_chain;
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                                        "Found current chain.");
+                }
+
+                for (j = 0; j < 4; j++)
+                        if (i == toi_writer_posn.saved_chain_number[j]) {
+                                toi_writer_posn.saved_chain_ptr[j] = cur_chain;
+                                toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                                        "Found saved chain ptr %d (%p) (offset"
+                                        " %d).", j, cur_chain,
+                                        cur_chain->saved_state[j].offset);
+                        }
+
+                if (posn) {
+                        while (--posn)
+                                cur_chain->blocks.current_extent =
+                                        cur_chain->blocks.current_extent->next;
+                } else
+                        cur_chain->blocks.current_extent = NULL;
+
+                cur_chain = cur_chain->next;
+        }
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Done.");
+        if (test_action_state(TOI_LOGALL))
+                dump_block_chains();
+}
+
+/*
+ * Storage needed
+ *
+ * Returns amount of space in the image header required
+ * for the chain data. This ignores the links between
+ * pages, which we factor in when allocating the space.
+ */
+int toi_bio_devinfo_storage_needed(void)
+{
+        int result = sizeof(num_chains);
+        struct toi_bdev_info *chain = prio_chain_head;
+
+        while (chain) {
+                result += metadata_size;
+
+                /* Chain size */
+                result += sizeof(int);
+
+                /* Extents */
+                result += (2 * sizeof(unsigned long) *
+                        chain->blocks.num_extents);
+
+                chain = chain->next;
+        }
+
+        result += 4 * sizeof(int);
+        return result;
+}
+
+static unsigned long chain_pages_used(struct toi_bdev_info *chain)
+{
+        struct hibernate_extent *this = chain->blocks.first;
+        struct hibernate_extent_saved_state *state = &chain->saved_state[3];
+        unsigned long size = 0;
+        int extent_idx = 1;
+
+        if (!state->extent_num) {
+                if (!this)
+                        return 0;
+                else
+                        return chain->blocks.size;
+        }
+
+        while (extent_idx < state->extent_num) {
+                size += (this->end - this->start + 1);
+                this = this->next;
+                extent_idx++;
+        }
+
+        /* We didn't use the one we're sitting on, so don't count it */
+        return size + state->offset - this->start;
+}
+
+void toi_bio_free_unused_storage_chain(struct toi_bdev_info *chain)
+{
+    unsigned long used = chain_pages_used(chain);
+
+    /* Free the storage */
+    unsigned long first_freed = 0;
+
+    if (chain->allocator->bio_allocator_ops->free_unused_storage)
+        first_freed = chain->allocator->bio_allocator_ops->free_unused_storage(chain, used);
+
+    printk(KERN_EMERG "Used %ld blocks in this chain. First extent freed is %lx.\n", used, first_freed);
+
+    /* Adjust / free the extents. */
+    toi_put_extent_chain_from(&chain->blocks, first_freed);
+
+    {
+        struct hibernate_extent *this = chain->blocks.first;
+        while (this) {
+            printk("Extent %lx-%lx.\n", this->start, this->end);
+            this = this->next;
+        }
+    }
+}
+
+/**
+ * toi_serialise_extent_chain - write a chain in the image
+ * @chain:        Chain to write.
+ **/
+static int toi_serialise_extent_chain(struct toi_bdev_info *chain)
+{
+        struct hibernate_extent *this;
+        int ret;
+        int i = 1;
+
+        chain->pages_used = chain_pages_used(chain);
+
+        if (test_action_state(TOI_LOGALL))
+                dump_block_chains();
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Serialising chain (dev_t %lx).",
+                        chain->dev_t);
+        /* Device info -  dev_t, prio, bmap_shift, blocks per page, positions */
+        ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops,
+                        (char *) &chain->uuid, metadata_size);
+        if (ret)
+                return ret;
+
+        /* Num extents */
+        ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops,
+                        (char *) &chain->blocks.num_extents, sizeof(int));
+        if (ret)
+                return ret;
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d extents.",
+                        chain->blocks.num_extents);
+
+        this = chain->blocks.first;
+        while (this) {
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent %d.", i);
+                ret = toiActiveAllocator->rw_header_chunk(WRITE,
+                                &toi_blockwriter_ops,
+                                (char *) this, 2 * sizeof(this->start));
+                if (ret)
+                        return ret;
+                this = this->next;
+                i++;
+        }
+
+        return ret;
+}
+
+int toi_serialise_extent_chains(void)
+{
+        struct toi_bdev_info *this = prio_chain_head;
+        int result;
+
+        /* Write the number of chains */
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Write number of chains (%d)",
+                        num_chains);
+        result = toiActiveAllocator->rw_header_chunk(WRITE,
+                        &toi_blockwriter_ops, (char *) &num_chains,
+                        sizeof(int));
+        if (result)
+                return result;
+
+        /* Then the chains themselves */
+        while (this) {
+                result = toi_serialise_extent_chain(this);
+                if (result)
+                        return result;
+                this = this->next;
+        }
+
+        /*
+         * Finally, the chain we should be on at the start of each
+         * section.
+         */
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Saved chain numbers.");
+        result = toiActiveAllocator->rw_header_chunk(WRITE,
+                        &toi_blockwriter_ops,
+                        (char *) &toi_writer_posn.saved_chain_number[0],
+                        4 * sizeof(int));
+
+        return result;
+}
+
+int toi_register_storage_chain(struct toi_bdev_info *new)
+{
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Inserting chain %p into list.",
+                        new);
+        toi_insert_chain_in_prio_list(new);
+        return 0;
+}
+
+static void free_bdev_info(struct toi_bdev_info *chain)
+{
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Free chain %p.", chain);
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Block extents.");
+        toi_put_extent_chain(&chain->blocks);
+
+        /*
+         * The allocator may need to do more than just free the chains
+         * (swap_free, for example). Don't call from boot kernel.
+         */
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Allocator extents.");
+        if (chain->allocator)
+                chain->allocator->bio_allocator_ops->free_storage(chain);
+
+        /*
+         * Dropping out of reading atomic copy? Need to undo
+         * toi_open_by_devnum.
+         */
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Bdev.");
+        if (chain->bdev && !IS_ERR(chain->bdev) &&
+                        chain->bdev != resume_block_device &&
+                        chain->bdev != header_block_device &&
+                        test_toi_state(TOI_TRYING_TO_RESUME))
+                toi_close_bdev(chain->bdev);
+
+        /* Poison */
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Struct.");
+        toi_kfree(39, chain, sizeof(*chain));
+
+        if (prio_chain_head == chain)
+                prio_chain_head = NULL;
+
+        num_chains--;
+}
+
+void free_all_bdev_info(void)
+{
+        struct toi_bdev_info *this = prio_chain_head;
+
+        while (this) {
+                struct toi_bdev_info *next = this->next;
+                free_bdev_info(this);
+                this = next;
+        }
+
+        memset((char *) &toi_writer_posn, 0, sizeof(toi_writer_posn));
+        prio_chain_head = NULL;
+}
+
+static void set_up_start_position(void)
+{
+        toi_writer_posn.current_chain = prio_chain_head;
+        go_next_page(0, 0);
+}
+
+/**
+ * toi_load_extent_chain - read back a chain saved in the image
+ * @chain:        Chain to load
+ *
+ * The linked list of extents is reconstructed from the disk. chain will point
+ * to the first entry.
+ **/
+int toi_load_extent_chain(int index, int *num_loaded)
+{
+        struct toi_bdev_info *chain = toi_kzalloc(39,
+                        sizeof(struct toi_bdev_info), GFP_ATOMIC);
+        struct hibernate_extent *this, *last = NULL;
+        int i, ret;
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Loading extent chain %d.", index);
+        /* Get dev_t, prio, bmap_shift, blocks per page, positions */
+        ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
+                        (char *) &chain->uuid, metadata_size);
+
+        if (ret) {
+                printk(KERN_ERR "Failed to read the size of extent chain.\n");
+                toi_kfree(39, chain, sizeof(*chain));
+                return 1;
+        }
+
+        toi_bkd.pages_used[index] = chain->pages_used;
+
+        ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
+                        (char *) &chain->blocks.num_extents, sizeof(int));
+        if (ret) {
+                printk(KERN_ERR "Failed to read the size of extent chain.\n");
+                toi_kfree(39, chain, sizeof(*chain));
+                return 1;
+        }
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d extents.",
+                        chain->blocks.num_extents);
+
+        for (i = 0; i < chain->blocks.num_extents; i++) {
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent %d.", i + 1);
+
+                this = toi_kzalloc(2, sizeof(struct hibernate_extent),
+                                TOI_ATOMIC_GFP);
+                if (!this) {
+                        printk(KERN_INFO "Failed to allocate a new extent.\n");
+                        free_bdev_info(chain);
+                        return -ENOMEM;
+                }
+                this->next = NULL;
+                /* Get the next page */
+                ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ,
+                                NULL, (char *) this, 2 * sizeof(this->start));
+                if (ret) {
+                        printk(KERN_INFO "Failed to read an extent.\n");
+                        toi_kfree(2, this, sizeof(struct hibernate_extent));
+                        free_bdev_info(chain);
+                        return 1;
+                }
+
+                if (last)
+                        last->next = this;
+                else {
+                        char b1[32], b2[32], b3[32];
+                        /*
+                         * Open the bdev
+                         */
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                                "Chain dev_t is %s. Resume dev t is %s. Header"
+                                " bdev_t is %s.\n",
+                                format_dev_t(b1, chain->dev_t),
+                                format_dev_t(b2, resume_dev_t),
+                                format_dev_t(b3, toi_sig_data->header_dev_t));
+
+                        if (chain->dev_t == resume_dev_t)
+                                chain->bdev = resume_block_device;
+                        else if (chain->dev_t == toi_sig_data->header_dev_t)
+                                chain->bdev = header_block_device;
+                        else {
+                                chain->bdev = toi_open_bdev(chain->uuid,
+                                                chain->dev_t, 1);
+                                if (IS_ERR(chain->bdev)) {
+                                        free_bdev_info(chain);
+                                        return -ENODEV;
+                                }
+                        }
+
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Chain bmap shift "
+                                        "is %d and blocks per page is %d.",
+                                        chain->bmap_shift,
+                                        chain->blocks_per_page);
+
+                        chain->blocks.first = this;
+
+                        /*
+                         * Couldn't do this earlier, but can't do
+                         * goto_start now - we may have already used blocks
+                         * in the first chain.
+                         */
+                        chain->blocks.current_extent = this;
+                        chain->blocks.current_offset = this->start;
+
+                        /*
+                         * Can't wait until we've read the whole chain
+                         * before we insert it in the list. We might need
+                         * this chain to read the next page in the header
+                         */
+                        toi_insert_chain_in_prio_list(chain);
+                }
+
+                /*
+                 * We have to wait until 2 extents are loaded before setting up
+                 * properly because if the first extent has only one page, we
+                 * will need to put the position on the second extent. Sounds
+                 * obvious, but it wasn't!
+                 */
+                (*num_loaded)++;
+                if ((*num_loaded) == 2)
+                        set_up_start_position();
+                last = this;
+        }
+
+        /*
+         * Shouldn't get empty chains, but it's not impossible. Link them in so
+         * they get freed properly later.
+         */
+        if (!chain->blocks.num_extents)
+                toi_insert_chain_in_prio_list(chain);
+
+        if (!chain->blocks.current_extent) {
+                chain->blocks.current_extent = chain->blocks.first;
+                if (chain->blocks.current_extent)
+                        chain->blocks.current_offset =
+                                chain->blocks.current_extent->start;
+        }
+        return 0;
+}
+
+int toi_load_extent_chains(void)
+{
+        int result;
+        int to_load;
+        int i;
+        int extents_loaded = 0;
+
+        result = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
+                        (char *) &to_load,
+                        sizeof(int));
+        if (result)
+                return result;
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d chains to read.", to_load);
+
+        for (i = 0; i < to_load; i++) {
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, " >> Loading chain %d/%d.",
+                                i, to_load);
+                result = toi_load_extent_chain(i, &extents_loaded);
+                if (result)
+                        return result;
+        }
+
+        /* If we never got to a second extent, we still need to do this. */
+        if (extents_loaded == 1)
+                set_up_start_position();
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Save chain numbers.");
+        result = toiActiveAllocator->rw_header_chunk_noreadahead(READ,
+                        &toi_blockwriter_ops,
+                        (char *) &toi_writer_posn.saved_chain_number[0],
+                        4 * sizeof(int));
+
+        return result;
+}
+
+static int toi_end_of_stream(int writing, int section_barrier)
+{
+        struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain;
+        int compare_to = next_section[current_stream];
+        struct toi_bdev_info *compare_chain =
+                toi_writer_posn.saved_chain_ptr[compare_to];
+        int compare_offset = compare_chain ?
+                compare_chain->saved_state[compare_to].offset : 0;
+
+        if (!section_barrier)
+                return 0;
+
+        if (!cur_chain)
+                return 1;
+
+        if (cur_chain == compare_chain &&
+            cur_chain->blocks.current_offset == compare_offset) {
+                if (writing) {
+                        if (!current_stream) {
+                                debug_broken_header();
+                                return 1;
+                        }
+                } else {
+                        more_readahead = 0;
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                                        "Reached the end of stream %d "
+                                        "(not an error).", current_stream);
+                        return 1;
+                }
+        }
+
+        return 0;
+}
+
+/**
+ * go_next_page - skip blocks to the start of the next page
+ * @writing: Whether we're reading or writing the image.
+ *
+ * Go forward one page.
+ **/
+int go_next_page(int writing, int section_barrier)
+{
+        struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain;
+        int max = cur_chain ? cur_chain->blocks_per_page : 1;
+
+        /* Nope. Go foward a page - or maybe two. Don't stripe the header,
+         * so that bad fragmentation doesn't put the extent data containing
+         * the location of the second page out of the first header page.
+         */
+        if (toi_extent_state_next(max, current_stream)) {
+                /* Don't complain if readahead falls off the end */
+                if (writing && section_barrier) {
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent state eof. "
+                                "Expected compression ratio too optimistic?");
+                        if (test_action_state(TOI_LOGALL))
+                                dump_block_chains();
+                }
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, "Ran out of extents to "
+                                "read/write. (Not necessarily a fatal error.");
+                return -ENOSPC;
+        }
+
+        return 0;
+}
+
+int devices_of_same_priority(struct toi_bdev_info *this)
+{
+        struct toi_bdev_info *check = prio_chain_head;
+        int i = 0;
+
+        while (check) {
+                if (check->prio == this->prio)
+                        i++;
+                check = check->next;
+        }
+
+        return i;
+}
+
+/**
+ * toi_bio_rw_page - do i/o on the next disk page in the image
+ * @writing: Whether reading or writing.
+ * @page: Page to do i/o on.
+ * @is_readahead: Whether we're doing readahead
+ * @free_group: The group used in allocating the page
+ *
+ * Submit a page for reading or writing, possibly readahead.
+ * Pass the group used in allocating the page as well, as it should
+ * be freed on completion of the bio if we're writing the page.
+ **/
+int toi_bio_rw_page(int writing, struct page *page,
+                int is_readahead, int free_group)
+{
+        int result = toi_end_of_stream(writing, 1);
+        struct toi_bdev_info *dev_info = toi_writer_posn.current_chain;
+
+        if (result) {
+                if (writing)
+                        abort_hibernate(TOI_INSUFFICIENT_STORAGE,
+                                "Insufficient storage for your image.");
+                else
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Seeking to "
+                                "read/write another page when stream has "
+                                "ended.");
+                return -ENOSPC;
+        }
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                        "%s %lx:%ld",
+                        writing ? "Write" : "Read",
+                        dev_info->dev_t, dev_info->blocks.current_offset);
+
+        result = toi_do_io(writing, dev_info->bdev,
+                dev_info->blocks.current_offset << dev_info->bmap_shift,
+                page, is_readahead, 0, free_group);
+
+        /* Ignore the result here - will check end of stream if come in again */
+        go_next_page(writing, 1);
+
+        if (result)
+                printk(KERN_ERR "toi_do_io returned %d.\n", result);
+        return result;
+}
+
+dev_t get_header_dev_t(void)
+{
+        return prio_chain_head->dev_t;
+}
+
+struct block_device *get_header_bdev(void)
+{
+        return prio_chain_head->bdev;
+}
+
+unsigned long get_headerblock(void)
+{
+        return prio_chain_head->blocks.first->start <<
+                prio_chain_head->bmap_shift;
+}
+
+int get_main_pool_phys_params(void)
+{
+        struct toi_bdev_info *this = prio_chain_head;
+        int result;
+
+        while (this) {
+                result = this->allocator->bio_allocator_ops->bmap(this);
+                if (result)
+                        return result;
+                this = this->next;
+        }
+
+        return 0;
+}
+
+static int apply_header_reservation(void)
+{
+        int i;
+
+        if (!header_pages_reserved) {
+                toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                                "No header pages reserved at the moment.");
+                return 0;
+        }
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Applying header reservation.");
+
+        /* Apply header space reservation */
+        toi_extent_state_goto_start();
+
+        for (i = 0; i < header_pages_reserved; i++)
+                if (go_next_page(1, 0))
+                        return -ENOSPC;
+
+        /* The end of header pages will be the start of pageset 2 */
+        toi_extent_state_save(2);
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                        "Finished applying header reservation.");
+        return 0;
+}
+
+static int toi_bio_register_storage(void)
+{
+        int result = 0;
+        struct toi_module_ops *this_module;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!this_module->enabled ||
+                    this_module->type != BIO_ALLOCATOR_MODULE)
+                        continue;
+                toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                                "Registering storage from %s.",
+                                this_module->name);
+                result = this_module->bio_allocator_ops->register_storage();
+                if (result)
+                        break;
+        }
+
+        return result;
+}
+
+void toi_bio_free_unused_storage(void)
+{
+    struct toi_bdev_info *this = prio_chain_head;
+
+    while (this) {
+        toi_bio_free_unused_storage_chain(this);
+        this = this->next;
+    }
+}
+
+int toi_bio_allocate_storage(unsigned long request)
+{
+        struct toi_bdev_info *chain = prio_chain_head;
+        unsigned long to_get = request;
+        unsigned long extra_pages, needed;
+        int no_free = 0;
+
+        if (!chain) {
+                int result = toi_bio_register_storage();
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: "
+                        "Registering storage.");
+                if (result)
+                        return 0;
+                chain = prio_chain_head;
+                if (!chain) {
+                        printk("TuxOnIce: No storage was registered.\n");
+                        return 0;
+                }
+        }
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: "
+                        "Request is %lu pages.", request);
+        extra_pages = DIV_ROUND_UP(request * (sizeof(unsigned long)
+                               + sizeof(int)), PAGE_SIZE);
+        needed = request + extra_pages + header_pages_reserved;
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Adding %lu extra pages and %lu "
+                        "for header => %lu.",
+                        extra_pages, header_pages_reserved, needed);
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Already allocated %lu pages.",
+                        raw_pages_allocd);
+
+        to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd : 0;
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Need to get %lu pages.", to_get);
+
+        if (!to_get)
+                return apply_header_reservation();
+
+        while (to_get && chain) {
+                int num_group = devices_of_same_priority(chain);
+                int divisor = num_group - no_free;
+                int i;
+                unsigned long portion = DIV_ROUND_UP(to_get, divisor);
+                unsigned long got = 0;
+                unsigned long got_this_round = 0;
+                struct toi_bdev_info *top = chain;
+
+                toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                                " Start of loop. To get is %lu. Divisor is %d.",
+                                to_get, divisor);
+                no_free = 0;
+
+                /*
+                 * We're aiming to spread the allocated storage as evenly
+                 * as possible, but we also want to get all the storage we
+                 * can off this priority.
+                 */
+                for (i = 0; i < num_group; i++) {
+                        struct toi_bio_allocator_ops *ops =
+                                chain->allocator->bio_allocator_ops;
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                                        " Asking for %lu pages from chain %p.",
+                                        portion, chain);
+                        got = ops->allocate_storage(chain, portion);
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                                        " Got %lu pages from allocator %p.",
+                                        got, chain);
+                        if (!got)
+                                no_free++;
+                        got_this_round += got;
+                        chain = chain->next;
+                }
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, " Loop finished. Got a "
+                                "total of %lu pages from %d allocators.",
+                                got_this_round, divisor - no_free);
+
+                raw_pages_allocd += got_this_round;
+                to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd :
+                        0;
+
+                /*
+                 * If we got anything from chains of this priority and we
+                 * still have storage to allocate, go over this priority
+                 * again.
+                 */
+                if (got_this_round && to_get)
+                        chain = top;
+                else
+                        no_free = 0;
+        }
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Finished allocating. Calling "
+                        "get_main_pool_phys_params");
+        /* Now let swap allocator bmap the pages */
+        get_main_pool_phys_params();
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Done. Reserving header.");
+        return apply_header_reservation();
+}
+
+void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd)
+{
+        int i = 0;
+        struct toi_bdev_info *cur_chain = prio_chain_head;
+
+        while (cur_chain) {
+                cur_chain->pages_used = bkd->pages_used[i];
+                cur_chain = cur_chain->next;
+                i++;
+        }
+}
+
+int toi_bio_chains_debug_info(char *buffer, int size)
+{
+        /* Show what we actually used */
+        struct toi_bdev_info *cur_chain = prio_chain_head;
+        int len = 0;
+
+        while (cur_chain) {
+                len += scnprintf(buffer + len, size - len, "  Used %lu pages "
+                                "from %s.\n", cur_chain->pages_used,
+                                cur_chain->name);
+                cur_chain = cur_chain->next;
+        }
+
+        return len;
+}
+
+void toi_bio_store_inc_image_ptr(struct toi_incremental_image_pointer *ptr)
+{
+    struct toi_bdev_info *this = toi_writer_posn.current_chain,
+                         *cmp = prio_chain_head;
+
+    ptr->save.chain = 1;
+    while (this != cmp) {
+        ptr->save.chain++;
+        cmp = cmp->next;
+    }
+    ptr->save.block = this->blocks.current_offset;
+
+    /* Save the raw info internally for quicker access when updating pointers */
+    ptr->bdev = this->bdev;
+    ptr->block = this->blocks.current_offset << this->bmap_shift;
+}
+
+void toi_bio_restore_inc_image_ptr(struct toi_incremental_image_pointer *ptr)
+{
+    int i = ptr->save.chain - 1;
+    struct toi_bdev_info *this;
+    struct hibernate_extent *hib;
+
+    /* Find chain by stored index */
+    this = prio_chain_head;
+    while (i) {
+        this = this->next;
+        i--;
+    }
+    toi_writer_posn.current_chain = this;
+
+    /* Restore block */
+    this->blocks.current_offset = ptr->save.block;
+
+    /* Find current offset from block number */
+    hib = this->blocks.first;
+
+    while (hib->start > ptr->save.block) {
+        hib = hib->next;
+    }
+
+    this->blocks.last_touched = this->blocks.current_extent = hib;
+}
diff -Nur linux-4.2/kernel/power/tuxonice_bio_core.c linux-4.2-pck/kernel/power/tuxonice_bio_core.c
--- linux-4.2/kernel/power/tuxonice_bio_core.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_bio_core.c	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,1933 @@
+/*
+ * kernel/power/tuxonice_bio.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file contains block io functions for TuxOnIce. These are
+ * used by the swapwriter and it is planned that they will also
+ * be used by the NFSwriter.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/syscalls.h>
+#include <linux/suspend.h>
+#include <linux/ctype.h>
+#include <linux/mount.h>
+#include <linux/fs_uuid.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_io.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_bio_internal.h"
+
+#define MEMORY_ONLY 1
+#define THROTTLE_WAIT 2
+
+/* #define MEASURE_MUTEX_CONTENTION */
+#ifndef MEASURE_MUTEX_CONTENTION
+#define my_mutex_lock(index, the_lock) mutex_lock(the_lock)
+#define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock)
+#else
+unsigned long mutex_times[2][2][NR_CPUS];
+#define my_mutex_lock(index, the_lock) do { \
+        int have_mutex; \
+        have_mutex = mutex_trylock(the_lock); \
+        if (!have_mutex) { \
+                mutex_lock(the_lock); \
+                mutex_times[index][0][smp_processor_id()]++; \
+        } else { \
+                mutex_times[index][1][smp_processor_id()]++; \
+        }
+
+#define my_mutex_unlock(index, the_lock) \
+        mutex_unlock(the_lock); \
+} while (0)
+#endif
+
+static int page_idx, reset_idx;
+
+static int target_outstanding_io = 1024;
+static int max_outstanding_writes, max_outstanding_reads;
+
+static struct page *bio_queue_head, *bio_queue_tail;
+static atomic_t toi_bio_queue_size;
+static DEFINE_SPINLOCK(bio_queue_lock);
+
+static int free_mem_throttle, throughput_throttle;
+int more_readahead = 1;
+static struct page *readahead_list_head, *readahead_list_tail;
+
+static struct page *waiting_on;
+
+static atomic_t toi_io_in_progress, toi_io_done;
+static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait);
+
+int current_stream;
+/* Not static, so that the allocators can setup and complete
+ * writing the header */
+char *toi_writer_buffer;
+int toi_writer_buffer_posn;
+
+static DEFINE_MUTEX(toi_bio_mutex);
+static DEFINE_MUTEX(toi_bio_readahead_mutex);
+
+static struct task_struct *toi_queue_flusher;
+static int toi_bio_queue_flush_pages(int dedicated_thread);
+
+struct toi_module_ops toi_blockwriter_ops;
+
+struct toi_incremental_image_pointer toi_inc_ptr[2][2];
+
+#define TOTAL_OUTSTANDING_IO (atomic_read(&toi_io_in_progress) + \
+               atomic_read(&toi_bio_queue_size))
+
+unsigned long raw_pages_allocd, header_pages_reserved;
+
+static int toi_rw_buffer(int writing, char *buffer, int buffer_size,
+                int no_readahead);
+
+/**
+ * set_free_mem_throttle - set the point where we pause to avoid oom.
+ *
+ * Initially, this value is zero, but when we first fail to allocate memory,
+ * we set it (plus a buffer) and thereafter throttle i/o once that limit is
+ * reached.
+ **/
+static void set_free_mem_throttle(void)
+{
+        int new_throttle = nr_free_buffer_pages() + 256;
+
+        if (new_throttle > free_mem_throttle)
+                free_mem_throttle = new_throttle;
+}
+
+#define NUM_REASONS 7
+static atomic_t reasons[NUM_REASONS];
+static char *reason_name[NUM_REASONS] = {
+        "readahead not ready",
+        "bio allocation",
+        "synchronous I/O",
+        "toi_bio_get_new_page",
+        "memory low",
+        "readahead buffer allocation",
+        "throughput_throttle",
+};
+
+/* User Specified Parameters. */
+unsigned long resume_firstblock;
+dev_t resume_dev_t;
+struct block_device *resume_block_device;
+static atomic_t resume_bdev_open_count;
+
+struct block_device *header_block_device;
+
+/**
+ * toi_open_bdev: Open a bdev at resume time.
+ *
+ * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t
+ * (the user can have resume= pointing at a swap partition/file that isn't
+ * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the
+ * header. It will be from a swap partition that was enabled when we hibernated,
+ * but we don't know it's real index until we read that first page.
+ * dev_t: The device major/minor.
+ * display_errs: Whether to try to do this quietly.
+ *
+ * We stored a dev_t in the image header. Open the matching device without
+ * requiring /dev/<whatever> in most cases and record the details needed
+ * to close it later and avoid duplicating work.
+ */
+struct block_device *toi_open_bdev(char *uuid, dev_t default_device,
+                int display_errs)
+{
+        struct block_device *bdev;
+        dev_t device = default_device;
+        char buf[32];
+        int retried = 0;
+
+retry:
+        if (uuid) {
+                struct fs_info seek;
+                strncpy((char *) &seek.uuid, uuid, 16);
+                seek.dev_t = 0;
+                seek.last_mount_size = 0;
+                device = blk_lookup_fs_info(&seek);
+                if (!device) {
+                        device = default_device;
+                        printk(KERN_DEBUG "Unable to resolve uuid. Falling back"
+                                        " to dev_t.\n");
+                } else
+                        printk(KERN_DEBUG "Resolved uuid to device %s.\n",
+                                        format_dev_t(buf, device));
+        }
+
+        if (!device) {
+                printk(KERN_ERR "TuxOnIce attempting to open a "
+                                "blank dev_t!\n");
+                dump_stack();
+                return NULL;
+        }
+        bdev = toi_open_by_devnum(device);
+
+        if (IS_ERR(bdev) || !bdev) {
+                if (!retried) {
+                        retried = 1;
+                        wait_for_device_probe();
+                        goto retry;
+                }
+                if (display_errs)
+                        toi_early_boot_message(1, TOI_CONTINUE_REQ,
+                                "Failed to get access to block device "
+                                "\"%x\" (error %d).\n Maybe you need "
+                                "to run mknod and/or lvmsetup in an "
+                                "initrd/ramfs?", device, bdev);
+                return ERR_PTR(-EINVAL);
+        }
+        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                        "TuxOnIce got bdev %p for dev_t %x.",
+                        bdev, device);
+
+        return bdev;
+}
+
+static void toi_bio_reserve_header_space(unsigned long request)
+{
+        header_pages_reserved = request;
+}
+
+/**
+ * do_bio_wait - wait for some TuxOnIce I/O to complete
+ * @reason: The array index of the reason we're waiting.
+ *
+ * Wait for a particular page of I/O if we're after a particular page.
+ * If we're not after a particular page, wait instead for all in flight
+ * I/O to be completed or for us to have enough free memory to be able
+ * to submit more I/O.
+ *
+ * If we wait, we also update our statistics regarding why we waited.
+ **/
+static void do_bio_wait(int reason)
+{
+        struct page *was_waiting_on = waiting_on;
+
+        /* On SMP, waiting_on can be reset, so we make a copy */
+        if (was_waiting_on) {
+                wait_on_page_locked(was_waiting_on);
+                atomic_inc(&reasons[reason]);
+        } else {
+                atomic_inc(&reasons[reason]);
+
+                wait_event(num_in_progress_wait,
+                        !atomic_read(&toi_io_in_progress) ||
+                        nr_free_buffer_pages() > free_mem_throttle);
+        }
+}
+
+/**
+ * throttle_if_needed - wait for I/O completion if throttle points are reached
+ * @flags: What to check and how to act.
+ *
+ * Check whether we need to wait for some I/O to complete. We always check
+ * whether we have enough memory available, but may also (depending upon
+ * @reason) check if the throughput throttle limit has been reached.
+ **/
+static int throttle_if_needed(int flags)
+{
+        int free_pages = nr_free_buffer_pages();
+
+        /* Getting low on memory and I/O is in progress? */
+        while (unlikely(free_pages < free_mem_throttle) &&
+                        atomic_read(&toi_io_in_progress) &&
+                        !test_result_state(TOI_ABORTED)) {
+                if (!(flags & THROTTLE_WAIT))
+                        return -ENOMEM;
+                do_bio_wait(4);
+                free_pages = nr_free_buffer_pages();
+        }
+
+        while (!(flags & MEMORY_ONLY) && throughput_throttle &&
+                TOTAL_OUTSTANDING_IO >= throughput_throttle &&
+                !test_result_state(TOI_ABORTED)) {
+                int result = toi_bio_queue_flush_pages(0);
+                if (result)
+                        return result;
+                atomic_inc(&reasons[6]);
+                wait_event(num_in_progress_wait,
+                        !atomic_read(&toi_io_in_progress) ||
+                        TOTAL_OUTSTANDING_IO < throughput_throttle);
+        }
+
+        return 0;
+}
+
+/**
+ * update_throughput_throttle - update the raw throughput throttle
+ * @jif_index: The number of times this function has been called.
+ *
+ * This function is called four times per second by the core, and used to limit
+ * the amount of I/O we submit at once, spreading out our waiting through the
+ * whole job and letting userui get an opportunity to do its work.
+ *
+ * We don't start limiting I/O until 1/4s has gone so that we get a
+ * decent sample for our initial limit, and keep updating it because
+ * throughput may vary (on rotating media, eg) with our block number.
+ *
+ * We throttle to 1/10s worth of I/O.
+ **/
+static void update_throughput_throttle(int jif_index)
+{
+        int done = atomic_read(&toi_io_done);
+        throughput_throttle = done * 2 / 5 / jif_index;
+}
+
+/**
+ * toi_finish_all_io - wait for all outstanding i/o to complete
+ *
+ * Flush any queued but unsubmitted I/O and wait for it all to complete.
+ **/
+static int toi_finish_all_io(void)
+{
+        int result = toi_bio_queue_flush_pages(0);
+        toi_bio_queue_flusher_should_finish = 1;
+        wake_up(&toi_io_queue_flusher);
+        wait_event(num_in_progress_wait, !TOTAL_OUTSTANDING_IO);
+        return result;
+}
+
+/**
+ * toi_end_bio - bio completion function.
+ * @bio: bio that has completed.
+ * @err: Error value. Yes, like end_swap_bio_read, we ignore it.
+ *
+ * Function called by the block driver from interrupt context when I/O is
+ * completed. If we were writing the page, we want to free it and will have
+ * set bio->bi_private to the parameter we should use in telling the page
+ * allocation accounting code what the page was allocated for. If we're
+ * reading the page, it will be in the singly linked list made from
+ * page->private pointers.
+ **/
+static void toi_end_bio(struct bio *bio, int err)
+{
+        struct page *page = bio->bi_io_vec[0].bv_page;
+
+        BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
+
+        unlock_page(page);
+        bio_put(bio);
+
+        if (waiting_on == page)
+                waiting_on = NULL;
+
+        put_page(page);
+
+        if (bio->bi_private)
+                toi__free_page((int) ((unsigned long) bio->bi_private) , page);
+
+        bio_put(bio);
+
+        atomic_dec(&toi_io_in_progress);
+        atomic_inc(&toi_io_done);
+
+        wake_up(&num_in_progress_wait);
+}
+
+/**
+ * submit - submit BIO request
+ * @writing: READ or WRITE.
+ * @dev: The block device we're using.
+ * @first_block: The first sector we're using.
+ * @page: The page being used for I/O.
+ * @free_group: If writing, the group that was used in allocating the page
+ *         and which will be used in freeing the page from the completion
+ *         routine.
+ *
+ * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the
+ * textbook - allocate and initialize the bio. If we're writing, make sure
+ * the page is marked as dirty. Then submit it and carry on."
+ *
+ * If we're just testing the speed of our own code, we fake having done all
+ * the hard work and all toi_end_bio immediately.
+ **/
+static int submit(int writing, struct block_device *dev, sector_t first_block,
+                struct page *page, int free_group)
+{
+        struct bio *bio = NULL;
+        int cur_outstanding_io, result;
+
+        /*
+         * Shouldn't throttle if reading - can deadlock in the single
+         * threaded case as pages are only freed when we use the
+         * readahead.
+         */
+        if (writing) {
+                result = throttle_if_needed(MEMORY_ONLY | THROTTLE_WAIT);
+                if (result)
+                        return result;
+        }
+
+        while (!bio) {
+                bio = bio_alloc(TOI_ATOMIC_GFP, 1);
+                if (!bio) {
+                        set_free_mem_throttle();
+                        do_bio_wait(1);
+                }
+        }
+
+        bio->bi_bdev = dev;
+        bio->bi_iter.bi_sector = first_block;
+        bio->bi_private = (void *) ((unsigned long) free_group);
+        bio->bi_end_io = toi_end_bio;
+        bio->bi_flags |= (1 << BIO_TOI);
+
+        if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+                printk(KERN_DEBUG "ERROR: adding page to bio at %lld\n",
+                                (unsigned long long) first_block);
+                bio_put(bio);
+                return -EFAULT;
+        }
+
+        bio_get(bio);
+
+        cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress);
+        if (writing) {
+                if (cur_outstanding_io > max_outstanding_writes)
+                        max_outstanding_writes = cur_outstanding_io;
+        } else {
+                if (cur_outstanding_io > max_outstanding_reads)
+                        max_outstanding_reads = cur_outstanding_io;
+        }
+
+        /* Still read the header! */
+        if (unlikely(test_action_state(TOI_TEST_BIO) && writing)) {
+                /* Fake having done the hard work */
+                set_bit(BIO_UPTODATE, &bio->bi_flags);
+                toi_end_bio(bio, 0);
+        } else
+                submit_bio(writing | REQ_SYNC, bio);
+
+        return 0;
+}
+
+/**
+ * toi_do_io: Prepare to do some i/o on a page and submit or batch it.
+ *
+ * @writing: Whether reading or writing.
+ * @bdev: The block device which we're using.
+ * @block0: The first sector we're reading or writing.
+ * @page: The page on which I/O is being done.
+ * @readahead_index: If doing readahead, the index (reset this flag when done).
+ * @syncio: Whether the i/o is being done synchronously.
+ *
+ * Prepare and start a read or write operation.
+ *
+ * Note that we always work with our own page. If writing, we might be given a
+ * compression buffer that will immediately be used to start compressing the
+ * next page. For reading, we do readahead and therefore don't know the final
+ * address where the data needs to go.
+ **/
+int toi_do_io(int writing, struct block_device *bdev, long block0,
+        struct page *page, int is_readahead, int syncio, int free_group)
+{
+        page->private = 0;
+
+        /* Do here so we don't race against toi_bio_get_next_page_read */
+        lock_page(page);
+
+        if (is_readahead) {
+                if (readahead_list_head)
+                        readahead_list_tail->private = (unsigned long) page;
+                else
+                        readahead_list_head = page;
+
+                readahead_list_tail = page;
+        }
+
+        /* Done before submitting to avoid races. */
+        if (syncio)
+                waiting_on = page;
+
+        /* Submit the page */
+        get_page(page);
+
+        if (submit(writing, bdev, block0, page, free_group))
+                return -EFAULT;
+
+        if (syncio)
+                do_bio_wait(2);
+
+        return 0;
+}
+
+/**
+ * toi_bdev_page_io - simpler interface to do directly i/o on a single page
+ * @writing: Whether reading or writing.
+ * @bdev: Block device on which we're operating.
+ * @pos: Sector at which page to read or write starts.
+ * @page: Page to be read/written.
+ *
+ * A simple interface to submit a page of I/O and wait for its completion.
+ * The caller must free the page used.
+ **/
+static int toi_bdev_page_io(int writing, struct block_device *bdev,
+                long pos, struct page *page)
+{
+        return toi_do_io(writing, bdev, pos, page, 0, 1, 0);
+}
+
+/**
+ * toi_bio_memory_needed - report the amount of memory needed for block i/o
+ *
+ * We want to have at least enough memory so as to have target_outstanding_io
+ * or more transactions on the fly at once. If we can do more, fine.
+ **/
+static int toi_bio_memory_needed(void)
+{
+        return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) +
+                                sizeof(struct bio));
+}
+
+/**
+ * toi_bio_print_debug_stats - put out debugging info in the buffer provided
+ * @buffer: A buffer of size @size into which text should be placed.
+ * @size: The size of @buffer.
+ *
+ * Fill a buffer with debugging info. This is used for both our debug_info sysfs
+ * entry and for recording the same info in dmesg.
+ **/
+static int toi_bio_print_debug_stats(char *buffer, int size)
+{
+        int len = 0;
+
+        if (toiActiveAllocator != &toi_blockwriter_ops) {
+                len = scnprintf(buffer, size,
+                                "- Block I/O inactive.\n");
+                return len;
+        }
+
+        len = scnprintf(buffer, size, "- Block I/O active.\n");
+
+        len += toi_bio_chains_debug_info(buffer + len, size - len);
+
+        len += scnprintf(buffer + len, size - len,
+                        "- Max outstanding reads %d. Max writes %d.\n",
+                        max_outstanding_reads, max_outstanding_writes);
+
+        len += scnprintf(buffer + len, size - len,
+                "  Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n",
+                target_outstanding_io,
+                PAGE_SIZE, (unsigned int) sizeof(struct request),
+                (unsigned int) sizeof(struct bio), toi_bio_memory_needed());
+
+#ifdef MEASURE_MUTEX_CONTENTION
+        {
+        int i;
+
+        len += scnprintf(buffer + len, size - len,
+                "  Mutex contention while reading:\n  Contended      Free\n");
+
+        for_each_online_cpu(i)
+                len += scnprintf(buffer + len, size - len,
+                "  %9lu %9lu\n",
+                mutex_times[0][0][i], mutex_times[0][1][i]);
+
+        len += scnprintf(buffer + len, size - len,
+                "  Mutex contention while writing:\n  Contended      Free\n");
+
+        for_each_online_cpu(i)
+                len += scnprintf(buffer + len, size - len,
+                "  %9lu %9lu\n",
+                mutex_times[1][0][i], mutex_times[1][1][i]);
+
+        }
+#endif
+
+        return len + scnprintf(buffer + len, size - len,
+                "  Free mem throttle point reached %d.\n", free_mem_throttle);
+}
+
+static int total_header_bytes;
+static int unowned;
+
+void debug_broken_header(void)
+{
+        printk(KERN_DEBUG "Image header too big for size allocated!\n");
+        print_toi_header_storage_for_modules();
+        printk(KERN_DEBUG "Page flags : %d.\n", toi_pageflags_space_needed());
+        printk(KERN_DEBUG "toi_header : %zu.\n", sizeof(struct toi_header));
+        printk(KERN_DEBUG "Total unowned : %d.\n", unowned);
+        printk(KERN_DEBUG "Total used : %d (%ld pages).\n", total_header_bytes,
+                        DIV_ROUND_UP(total_header_bytes, PAGE_SIZE));
+        printk(KERN_DEBUG "Space needed now : %ld.\n",
+                        get_header_storage_needed());
+        dump_block_chains();
+        abort_hibernate(TOI_HEADER_TOO_BIG, "Header reservation too small.");
+}
+
+static int toi_bio_update_previous_inc_img_ptr(int stream)
+{
+    int result;
+    char * buffer = (char *) toi_get_zeroed_page(12, TOI_ATOMIC_GFP);
+    struct page *page;
+    struct toi_incremental_image_pointer *prev, *this;
+
+    prev = &toi_inc_ptr[stream][0];
+    this = &toi_inc_ptr[stream][1];
+
+    if (!buffer) {
+        // We're at the start of writing a pageset. Memory should not be that scarce.
+        return -ENOMEM;
+    }
+
+    page = virt_to_page(buffer);
+    result = toi_do_io(READ, prev->bdev, prev->block, page, 0, 1, 0);
+
+    if (result)
+        goto out;
+
+    memcpy(buffer, (char *) this, sizeof(this->save));
+
+    result = toi_do_io(WRITE, prev->bdev, prev->block, page, 0, 0, 12);
+
+    // If the IO is successfully submitted (!result), the page will be freed
+    // asynchronously on completion.
+out:
+    if (result)
+        toi__free_page(12, virt_to_page(buffer));
+    return result;
+}
+
+/**
+ * toi_rw_init_incremental - incremental image part of setting up to write new section
+ */
+static int toi_write_init_incremental(int stream)
+{
+    int result = 0;
+
+    // Remember the location of this block so we can link to it.
+    toi_bio_store_inc_image_ptr(&toi_inc_ptr[stream][1]);
+
+    // Update the pointer at the start of the last pageset with the same stream number.
+    result = toi_bio_update_previous_inc_img_ptr(stream);
+    if (result)
+        return result;
+
+    // Move the current to the previous slot.
+    memcpy(&toi_inc_ptr[stream][0], &toi_inc_ptr[stream][1], sizeof(toi_inc_ptr[stream][1]));
+
+    // Store a blank pointer at the start of this incremental pageset
+    memset(&toi_inc_ptr[stream][1], 0, sizeof(toi_inc_ptr[stream][1]));
+    result = toi_rw_buffer(WRITE, (char *) &toi_inc_ptr[stream][1], sizeof(toi_inc_ptr[stream][1]), 0);
+    if (result)
+        return result;
+
+    // Serialise extent chains if this is an incremental pageset
+    return toi_serialise_extent_chains();
+}
+
+/**
+ * toi_read_init_incremental - incremental image part of setting up to read new section
+ */
+static int toi_read_init_incremental(int stream)
+{
+    int result;
+
+    // Set our position to the start of the next pageset
+    toi_bio_restore_inc_image_ptr(&toi_inc_ptr[stream][1]);
+
+    // Read the start of the next incremental pageset (if any)
+    result = toi_rw_buffer(READ, (char *) &toi_inc_ptr[stream][1], sizeof(toi_inc_ptr[stream][1]), 0);
+
+    if (!result)
+        result = toi_load_extent_chains();
+
+    return result;
+}
+
+/**
+ * toi_rw_init - prepare to read or write a stream in the image
+ * @writing: Whether reading or writing.
+ * @stream number: Section of the image being processed.
+ *
+ * Prepare to read or write a section ('stream') in the image.
+ **/
+static int toi_rw_init(int writing, int stream_number)
+{
+        if (stream_number)
+                toi_extent_state_restore(stream_number);
+        else
+                toi_extent_state_goto_start();
+
+        if (writing) {
+                reset_idx = 0;
+                if (!current_stream)
+                        page_idx = 0;
+        } else {
+                reset_idx = 1;
+        }
+
+        atomic_set(&toi_io_done, 0);
+        if (!toi_writer_buffer)
+                toi_writer_buffer = (char *) toi_get_zeroed_page(11,
+                                TOI_ATOMIC_GFP);
+        toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE;
+
+        current_stream = stream_number;
+
+        more_readahead = 1;
+
+        if (test_result_state(TOI_KEPT_IMAGE)) {
+            int result;
+
+            if (writing) {
+                result = toi_write_init_incremental(stream_number);
+            } else {
+                result = toi_read_init_incremental(stream_number);
+            }
+
+            if (result)
+                return result;
+        }
+
+        return toi_writer_buffer ? 0 : -ENOMEM;
+}
+
+/**
+ * toi_bio_queue_write - queue a page for writing
+ * @full_buffer: Pointer to a page to be queued
+ *
+ * Add a page to the queue to be submitted. If we're the queue flusher,
+ * we'll do this once we've dropped toi_bio_mutex, so other threads can
+ * continue to submit I/O while we're on the slow path doing the actual
+ * submission.
+ **/
+static void toi_bio_queue_write(char **full_buffer)
+{
+        struct page *page = virt_to_page(*full_buffer);
+        unsigned long flags;
+
+        *full_buffer = NULL;
+        page->private = 0;
+
+        spin_lock_irqsave(&bio_queue_lock, flags);
+        if (!bio_queue_head)
+                bio_queue_head = page;
+        else
+                bio_queue_tail->private = (unsigned long) page;
+
+        bio_queue_tail = page;
+        atomic_inc(&toi_bio_queue_size);
+
+        spin_unlock_irqrestore(&bio_queue_lock, flags);
+        wake_up(&toi_io_queue_flusher);
+}
+
+/**
+ * toi_rw_cleanup - Cleanup after i/o.
+ * @writing: Whether we were reading or writing.
+ *
+ * Flush all I/O and clean everything up after reading or writing a
+ * section of the image.
+ **/
+static int toi_rw_cleanup(int writing)
+{
+        int i, result = 0;
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_rw_cleanup.");
+        if (writing) {
+                if (toi_writer_buffer_posn && !test_result_state(TOI_ABORTED))
+                        toi_bio_queue_write(&toi_writer_buffer);
+
+                while (bio_queue_head && !result)
+                        result = toi_bio_queue_flush_pages(0);
+
+                if (result)
+                        return result;
+
+                if (current_stream == 2)
+                        toi_extent_state_save(1);
+                else if (current_stream == 1)
+                        toi_extent_state_save(3);
+        }
+
+        result = toi_finish_all_io();
+
+        while (readahead_list_head) {
+                void *next = (void *) readahead_list_head->private;
+                toi__free_page(12, readahead_list_head);
+                readahead_list_head = next;
+        }
+
+        readahead_list_tail = NULL;
+
+        if (!current_stream)
+                return result;
+
+        for (i = 0; i < NUM_REASONS; i++) {
+                if (!atomic_read(&reasons[i]))
+                        continue;
+                printk(KERN_DEBUG "Waited for i/o due to %s %d times.\n",
+                                reason_name[i], atomic_read(&reasons[i]));
+                atomic_set(&reasons[i], 0);
+        }
+
+        current_stream = 0;
+        return result;
+}
+
+/**
+ * toi_start_one_readahead - start one page of readahead
+ * @dedicated_thread: Is this a thread dedicated to doing readahead?
+ *
+ * Start one new page of readahead. If this is being called by a thread
+ * whose only just is to submit readahead, don't quit because we failed
+ * to allocate a page.
+ **/
+static int toi_start_one_readahead(int dedicated_thread)
+{
+        char *buffer = NULL;
+        int oom = 0, result;
+
+        result = throttle_if_needed(dedicated_thread ? THROTTLE_WAIT : 0);
+        if (result) {
+            printk("toi_start_one_readahead: throttle_if_needed returned %d.\n", result);
+            return result;
+        }
+
+        mutex_lock(&toi_bio_readahead_mutex);
+
+        while (!buffer) {
+                buffer = (char *) toi_get_zeroed_page(12,
+                                TOI_ATOMIC_GFP);
+                if (!buffer) {
+                        if (oom && !dedicated_thread) {
+                                mutex_unlock(&toi_bio_readahead_mutex);
+                                printk("toi_start_one_readahead: oom and !dedicated thread %d.\n", result);
+                                return -ENOMEM;
+                        }
+
+                        oom = 1;
+                        set_free_mem_throttle();
+                        do_bio_wait(5);
+                }
+        }
+
+        result = toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0);
+        if (result) {
+            printk("toi_start_one_readahead: toi_bio_rw_page returned %d.\n", result);
+        }
+        if (result == -ENOSPC)
+                toi__free_page(12, virt_to_page(buffer));
+        mutex_unlock(&toi_bio_readahead_mutex);
+        if (result) {
+                if (result == -ENOSPC)
+                        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+                                        "Last readahead page submitted.");
+                else
+                        printk(KERN_DEBUG "toi_bio_rw_page returned %d.\n",
+                                        result);
+        }
+        return result;
+}
+
+/**
+ * toi_start_new_readahead - start new readahead
+ * @dedicated_thread: Are we dedicated to this task?
+ *
+ * Start readahead of image pages.
+ *
+ * We can be called as a thread dedicated to this task (may be helpful on
+ * systems with lots of CPUs), in which case we don't exit until there's no
+ * more readahead.
+ *
+ * If this is not called by a dedicated thread, we top up our queue until
+ * there's no more readahead to submit, we've submitted the number given
+ * in target_outstanding_io or the number in progress exceeds the target
+ * outstanding I/O value.
+ *
+ * No mutex needed because this is only ever called by the first cpu.
+ **/
+static int toi_start_new_readahead(int dedicated_thread)
+{
+        int last_result, num_submitted = 0;
+
+        /* Start a new readahead? */
+        if (!more_readahead)
+                return 0;
+
+        do {
+                last_result = toi_start_one_readahead(dedicated_thread);
+
+                if (last_result) {
+                        if (last_result == -ENOMEM || last_result == -ENOSPC)
+                                return 0;
+
+                        printk(KERN_DEBUG
+                                "Begin read chunk returned %d.\n",
+                                last_result);
+                } else
+                        num_submitted++;
+
+        } while (more_readahead && !last_result &&
+                 (dedicated_thread ||
+                  (num_submitted < target_outstanding_io &&
+                   atomic_read(&toi_io_in_progress) < target_outstanding_io)));
+
+        return last_result;
+}
+
+/**
+ * bio_io_flusher - start the dedicated I/O flushing routine
+ * @writing: Whether we're writing the image.
+ **/
+static int bio_io_flusher(int writing)
+{
+
+        if (writing)
+                return toi_bio_queue_flush_pages(1);
+        else
+                return toi_start_new_readahead(1);
+}
+
+/**
+ * toi_bio_get_next_page_read - read a disk page, perhaps with readahead
+ * @no_readahead: Whether we can use readahead
+ *
+ * Read a page from disk, submitting readahead and cleaning up finished i/o
+ * while we wait for the page we're after.
+ **/
+static int toi_bio_get_next_page_read(int no_readahead)
+{
+        char *virt;
+        struct page *old_readahead_list_head;
+
+        /*
+         * When reading the second page of the header, we have to
+         * delay submitting the read until after we've gotten the
+         * extents out of the first page.
+         */
+        if (unlikely(no_readahead)) {
+            int result = toi_start_one_readahead(0);
+            if (result) {
+                printk(KERN_EMERG "No readahead and toi_start_one_readahead "
+                        "returned non-zero.\n");
+                return -EIO;
+            }
+        }
+
+        if (unlikely(!readahead_list_head)) {
+                /*
+                 * If the last page finishes exactly on the page
+                 * boundary, we will be called one extra time and
+                 * have no data to return. In this case, we should
+                 * not BUG(), like we used to!
+                 */
+                if (!more_readahead) {
+                        printk(KERN_EMERG "No more readahead.\n");
+                        return -ENOSPC;
+                }
+                if (unlikely(toi_start_one_readahead(0))) {
+                        printk(KERN_EMERG "No readahead and "
+                         "toi_start_one_readahead returned non-zero.\n");
+                        return -EIO;
+                }
+        }
+
+        if (PageLocked(readahead_list_head)) {
+                waiting_on = readahead_list_head;
+                do_bio_wait(0);
+        }
+
+        virt = page_address(readahead_list_head);
+        memcpy(toi_writer_buffer, virt, PAGE_SIZE);
+        
+        mutex_lock(&toi_bio_readahead_mutex);
+        old_readahead_list_head = readahead_list_head;
+        readahead_list_head = (struct page *) readahead_list_head->private;
+        mutex_unlock(&toi_bio_readahead_mutex);
+        toi__free_page(12, old_readahead_list_head);
+        return 0;
+}
+
+/**
+ * toi_bio_queue_flush_pages - flush the queue of pages queued for writing
+ * @dedicated_thread: Whether we're a dedicated thread
+ *
+ * Flush the queue of pages ready to be written to disk.
+ *
+ * If we're a dedicated thread, stay in here until told to leave,
+ * sleeping in wait_event.
+ *
+ * The first thread is normally the only one to come in here. Another
+ * thread can enter this routine too, though, via throttle_if_needed.
+ * Since that's the case, we must be careful to only have one thread
+ * doing this work at a time. Otherwise we have a race and could save
+ * pages out of order.
+ *
+ * If an error occurs, free all remaining pages without submitting them
+ * for I/O.
+ **/
+
+int toi_bio_queue_flush_pages(int dedicated_thread)
+{
+        unsigned long flags;
+        int result = 0;
+        static DEFINE_MUTEX(busy);
+
+        if (!mutex_trylock(&busy))
+                return 0;
+
+top:
+        spin_lock_irqsave(&bio_queue_lock, flags);
+        while (bio_queue_head) {
+                struct page *page = bio_queue_head;
+                bio_queue_head = (struct page *) page->private;
+                if (bio_queue_tail == page)
+                        bio_queue_tail = NULL;
+                atomic_dec(&toi_bio_queue_size);
+                spin_unlock_irqrestore(&bio_queue_lock, flags);
+
+                /* Don't generate more error messages if already had one */
+                if (!result)
+                        result = toi_bio_rw_page(WRITE, page, 0, 11);
+                /*
+                 * If writing the page failed, don't drop out.
+                 * Flush the rest of the queue too.
+                 */
+                if (result)
+                        toi__free_page(11 , page);
+                spin_lock_irqsave(&bio_queue_lock, flags);
+        }
+        spin_unlock_irqrestore(&bio_queue_lock, flags);
+
+        if (dedicated_thread) {
+                wait_event(toi_io_queue_flusher, bio_queue_head ||
+                                toi_bio_queue_flusher_should_finish);
+                if (likely(!toi_bio_queue_flusher_should_finish))
+                        goto top;
+                toi_bio_queue_flusher_should_finish = 0;
+        }
+
+        mutex_unlock(&busy);
+        return result;
+}
+
+/**
+ * toi_bio_get_new_page - get a new page for I/O
+ * @full_buffer: Pointer to a page to allocate.
+ **/
+static int toi_bio_get_new_page(char **full_buffer)
+{
+        int result = throttle_if_needed(THROTTLE_WAIT);
+        if (result)
+                return result;
+
+        while (!*full_buffer) {
+                *full_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
+                if (!*full_buffer) {
+                        set_free_mem_throttle();
+                        do_bio_wait(3);
+                }
+        }
+
+        return 0;
+}
+
+/**
+ * toi_rw_buffer - combine smaller buffers into PAGE_SIZE I/O
+ * @writing:                Bool - whether writing (or reading).
+ * @buffer:                The start of the buffer to write or fill.
+ * @buffer_size:        The size of the buffer to write or fill.
+ * @no_readahead:        Don't try to start readhead (when getting extents).
+ **/
+static int toi_rw_buffer(int writing, char *buffer, int buffer_size,
+                int no_readahead)
+{
+        int bytes_left = buffer_size, result = 0;
+
+        while (bytes_left) {
+                char *source_start = buffer + buffer_size - bytes_left;
+                char *dest_start = toi_writer_buffer + toi_writer_buffer_posn;
+                int capacity = PAGE_SIZE - toi_writer_buffer_posn;
+                char *to = writing ? dest_start : source_start;
+                char *from = writing ? source_start : dest_start;
+
+                if (bytes_left <= capacity) {
+                        memcpy(to, from, bytes_left);
+                        toi_writer_buffer_posn += bytes_left;
+                        return 0;
+                }
+
+                /* Complete this page and start a new one */
+                memcpy(to, from, capacity);
+                bytes_left -= capacity;
+
+                if (!writing) {
+                        /*
+                         * Perform actual I/O:
+                         * read readahead_list_head into toi_writer_buffer
+                         */
+                        int result = toi_bio_get_next_page_read(no_readahead);
+                        if (result && bytes_left) {
+                                printk("toi_bio_get_next_page_read "
+                                        "returned %d. Expecting to read %d bytes.\n", result, bytes_left);
+                                return result;
+                        }
+                } else {
+                        toi_bio_queue_write(&toi_writer_buffer);
+                        result = toi_bio_get_new_page(&toi_writer_buffer);
+                        if (result) {
+                                printk(KERN_ERR "toi_bio_get_new_page returned "
+                                                "%d.\n", result);
+                                return result;
+                        }
+                }
+
+                toi_writer_buffer_posn = 0;
+                toi_cond_pause(0, NULL);
+        }
+
+        return 0;
+}
+
+/**
+ * toi_bio_read_page - read a page of the image
+ * @pfn:                The pfn where the data belongs.
+ * @buffer_page:        The page containing the (possibly compressed) data.
+ * @buf_size:                The number of bytes on @buffer_page used (PAGE_SIZE).
+ *
+ * Read a (possibly compressed) page from the image, into buffer_page,
+ * returning its pfn and the buffer size.
+ **/
+static int toi_bio_read_page(unsigned long *pfn, int buf_type,
+                void *buffer_page, unsigned int *buf_size)
+{
+        int result = 0;
+        int this_idx;
+        char *buffer_virt = TOI_MAP(buf_type, buffer_page);
+
+        /*
+         * Only call start_new_readahead if we don't have a dedicated thread
+         * and we're the queue flusher.
+         */
+        if (current == toi_queue_flusher && more_readahead &&
+                        !test_action_state(TOI_NO_READAHEAD)) {
+                int result2 = toi_start_new_readahead(0);
+                if (result2) {
+                        printk(KERN_DEBUG "Queue flusher and "
+                         "toi_start_one_readahead returned non-zero.\n");
+                        result = -EIO;
+                        goto out;
+                }
+        }
+
+        my_mutex_lock(0, &toi_bio_mutex);
+
+        /*
+         * Structure in the image:
+         *        [destination pfn|page size|page data]
+         * buf_size is PAGE_SIZE
+         * We can validly find there's nothing to read in a multithreaded
+         * situation.
+         */
+        if (toi_rw_buffer(READ, (char *) &this_idx, sizeof(int), 0) ||
+            toi_rw_buffer(READ, (char *) pfn, sizeof(unsigned long), 0) ||
+            toi_rw_buffer(READ, (char *) buf_size, sizeof(int), 0) ||
+            toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) {
+                result = -ENODATA;
+                goto out_unlock;
+        }
+
+        if (reset_idx) {
+                page_idx = this_idx;
+                reset_idx = 0;
+        } else {
+                page_idx++;
+                if (!this_idx)
+                        result = -ENODATA;
+                else if (page_idx != this_idx)
+                        printk(KERN_ERR "Got page index %d, expected %d.\n",
+                                        this_idx, page_idx);
+        }
+
+out_unlock:
+        my_mutex_unlock(0, &toi_bio_mutex);
+out:
+        TOI_UNMAP(buf_type, buffer_page);
+        return result;
+}
+
+/**
+ * toi_bio_write_page - write a page of the image
+ * @pfn:                The pfn where the data belongs.
+ * @buffer_page:        The page containing the (possibly compressed) data.
+ * @buf_size:        The number of bytes on @buffer_page used.
+ *
+ * Write a (possibly compressed) page to the image from the buffer, together
+ * with it's index and buffer size.
+ **/
+static int toi_bio_write_page(unsigned long pfn, int buf_type,
+                void *buffer_page, unsigned int buf_size)
+{
+        char *buffer_virt;
+        int result = 0, result2 = 0;
+
+        if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED)))
+                return 0;
+
+        my_mutex_lock(1, &toi_bio_mutex);
+
+        if (test_result_state(TOI_ABORTED)) {
+                my_mutex_unlock(1, &toi_bio_mutex);
+                return 0;
+        }
+
+        buffer_virt = TOI_MAP(buf_type, buffer_page);
+        page_idx++;
+
+        /*
+         * Structure in the image:
+         *        [destination pfn|page size|page data]
+         * buf_size is PAGE_SIZE
+         */
+        if (toi_rw_buffer(WRITE, (char *) &page_idx, sizeof(int), 0) ||
+            toi_rw_buffer(WRITE, (char *) &pfn, sizeof(unsigned long), 0) ||
+            toi_rw_buffer(WRITE, (char *) &buf_size, sizeof(int), 0) ||
+            toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) {
+                printk(KERN_DEBUG "toi_rw_buffer returned non-zero to "
+                                "toi_bio_write_page.\n");
+                result = -EIO;
+        }
+
+        TOI_UNMAP(buf_type, buffer_page);
+        my_mutex_unlock(1, &toi_bio_mutex);
+
+        if (current == toi_queue_flusher)
+                result2 = toi_bio_queue_flush_pages(0);
+
+        return result ? result : result2;
+}
+
+/**
+ * _toi_rw_header_chunk - read or write a portion of the image header
+ * @writing:                Whether reading or writing.
+ * @owner:                The module for which we're writing.
+ *                        Used for confirming that modules
+ *                        don't use more header space than they asked for.
+ * @buffer:                Address of the data to write.
+ * @buffer_size:        Size of the data buffer.
+ * @no_readahead:        Don't try to start readhead (when getting extents).
+ *
+ * Perform PAGE_SIZE I/O. Start readahead if needed.
+ **/
+static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
+                char *buffer, int buffer_size, int no_readahead)
+{
+        int result = 0;
+
+        if (owner) {
+                owner->header_used += buffer_size;
+                toi_message(TOI_HEADER, TOI_LOW, 1,
+                        "Header: %s : %d bytes (%d/%d) from offset %d.",
+                        owner->name,
+                        buffer_size, owner->header_used,
+                        owner->header_requested,
+                        toi_writer_buffer_posn);
+                if (owner->header_used > owner->header_requested && writing) {
+                        printk(KERN_EMERG "TuxOnIce module %s is using more "
+                                "header space (%u) than it requested (%u).\n",
+                                owner->name,
+                                owner->header_used,
+                                owner->header_requested);
+                        return buffer_size;
+                }
+        } else {
+                unowned += buffer_size;
+                toi_message(TOI_HEADER, TOI_LOW, 1,
+                        "Header: (No owner): %d bytes (%d total so far) from "
+                        "offset %d.", buffer_size, unowned,
+                        toi_writer_buffer_posn);
+        }
+
+        if (!writing && !no_readahead && more_readahead) {
+                result = toi_start_new_readahead(0);
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, "Start new readahead "
+                                "returned %d.", result);
+        }
+
+        if (!result) {
+                result = toi_rw_buffer(writing, buffer, buffer_size,
+                                no_readahead);
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, "rw_buffer returned "
+                                "%d.", result);
+        }
+
+        total_header_bytes += buffer_size;
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "_toi_rw_header_chunk returning "
+                        "%d.", result);
+        return result;
+}
+
+static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
+                char *buffer, int size)
+{
+        return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
+}
+
+static int toi_rw_header_chunk_noreadahead(int writing,
+                struct toi_module_ops *owner, char *buffer, int size)
+{
+        return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
+}
+
+/**
+ * toi_bio_storage_needed - get the amount of storage needed for my fns
+ **/
+static int toi_bio_storage_needed(void)
+{
+        return sizeof(int) + PAGE_SIZE + toi_bio_devinfo_storage_needed();
+}
+
+/**
+ * toi_bio_save_config_info - save block I/O config to image header
+ * @buf:        PAGE_SIZE'd buffer into which data should be saved.
+ **/
+static int toi_bio_save_config_info(char *buf)
+{
+        int *ints = (int *) buf;
+        ints[0] = target_outstanding_io;
+        return sizeof(int);
+}
+
+/**
+ * toi_bio_load_config_info - restore block I/O config
+ * @buf:        Data to be reloaded.
+ * @size:        Size of the buffer saved.
+ **/
+static void toi_bio_load_config_info(char *buf, int size)
+{
+        int *ints = (int *) buf;
+        target_outstanding_io  = ints[0];
+}
+
+void close_resume_dev_t(int force)
+{
+        if (!resume_block_device)
+                return;
+
+        if (force)
+                atomic_set(&resume_bdev_open_count, 0);
+        else
+                atomic_dec(&resume_bdev_open_count);
+
+        if (!atomic_read(&resume_bdev_open_count)) {
+                toi_close_bdev(resume_block_device);
+                resume_block_device = NULL;
+        }
+}
+
+int open_resume_dev_t(int force, int quiet)
+{
+        if (force) {
+                close_resume_dev_t(1);
+                atomic_set(&resume_bdev_open_count, 1);
+        } else
+                atomic_inc(&resume_bdev_open_count);
+
+        if (resume_block_device)
+                return 0;
+
+        resume_block_device = toi_open_bdev(NULL, resume_dev_t, 0);
+        if (IS_ERR(resume_block_device)) {
+                if (!quiet)
+                        toi_early_boot_message(1, TOI_CONTINUE_REQ,
+                                "Failed to open device %x, where"
+                                " the header should be found.",
+                                resume_dev_t);
+                resume_block_device = NULL;
+                atomic_set(&resume_bdev_open_count, 0);
+                return 1;
+        }
+
+        return 0;
+}
+
+/**
+ * toi_bio_initialise - initialise bio code at start of some action
+ * @starting_cycle:        Whether starting a hibernation cycle, or just reading or
+ *                        writing a sysfs value.
+ **/
+static int toi_bio_initialise(int starting_cycle)
+{
+        int result;
+
+        if (!starting_cycle || !resume_dev_t)
+                return 0;
+
+        max_outstanding_writes = 0;
+        max_outstanding_reads = 0;
+        current_stream = 0;
+        toi_queue_flusher = current;
+#ifdef MEASURE_MUTEX_CONTENTION
+        {
+                int i, j, k;
+
+                for (i = 0; i < 2; i++)
+                        for (j = 0; j < 2; j++)
+                                for_each_online_cpu(k)
+                                        mutex_times[i][j][k] = 0;
+        }
+#endif
+        result = open_resume_dev_t(0, 1);
+
+        if (result)
+                return result;
+
+        return get_signature_page();
+}
+
+static unsigned long raw_to_real(unsigned long raw)
+{
+        unsigned long extra;
+
+        extra = (raw * (sizeof(unsigned long) + sizeof(int)) +
+                (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) /
+                (PAGE_SIZE + sizeof(unsigned long) + sizeof(int));
+
+        return raw > extra ? raw - extra : 0;
+}
+
+static unsigned long toi_bio_storage_available(void)
+{
+        unsigned long sum = 0;
+        struct toi_module_ops *this_module;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!this_module->enabled ||
+                    this_module->type != BIO_ALLOCATOR_MODULE)
+                        continue;
+                toi_message(TOI_BIO, TOI_VERBOSE, 0, "Seeking storage "
+                                "available from %s.", this_module->name);
+                sum += this_module->bio_allocator_ops->storage_available();
+        }
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Total storage available is %lu "
+                        "pages (%d header pages).", sum, header_pages_reserved);
+
+        return sum > header_pages_reserved ?
+                raw_to_real(sum - header_pages_reserved) : 0;
+
+}
+
+static unsigned long toi_bio_storage_allocated(void)
+{
+        return raw_pages_allocd > header_pages_reserved ?
+                raw_to_real(raw_pages_allocd - header_pages_reserved) : 0;
+}
+
+/*
+ * If we have read part of the image, we might have filled  memory with
+ * data that should be zeroed out.
+ */
+static void toi_bio_noresume_reset(void)
+{
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_noresume_reset.");
+        toi_rw_cleanup(READ);
+        free_all_bdev_info();
+}
+
+/**
+ * toi_bio_cleanup - cleanup after some action
+ * @finishing_cycle:        Whether completing a cycle.
+ **/
+static void toi_bio_cleanup(int finishing_cycle)
+{
+        if (!finishing_cycle)
+                return;
+
+        if (toi_writer_buffer) {
+                toi_free_page(11, (unsigned long) toi_writer_buffer);
+                toi_writer_buffer = NULL;
+        }
+
+        forget_signature_page();
+
+        if (header_block_device && toi_sig_data &&
+                        toi_sig_data->header_dev_t != resume_dev_t)
+                toi_close_bdev(header_block_device);
+
+        header_block_device = NULL;
+
+        close_resume_dev_t(0);
+}
+
+static int toi_bio_write_header_init(void)
+{
+        int result;
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_write_header_init");
+        toi_rw_init(WRITE, 0);
+        toi_writer_buffer_posn = 0;
+
+        /* Info needed to bootstrap goes at the start of the header.
+         * First we save the positions and devinfo, including the number
+         * of header pages. Then we save the structs containing data needed
+         * for reading the header pages back.
+         * Note that even if header pages take more than one page, when we
+         * read back the info, we will have restored the location of the
+         * next header page by the time we go to use it.
+         */
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "serialise extent chains.");
+        result = toi_serialise_extent_chains();
+
+        if (result)
+                return result;
+
+        /*
+         * Signature page hasn't been modified at this point. Write it in
+         * the header so we can restore it later.
+         */
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "serialise signature page.");
+        return toi_rw_header_chunk_noreadahead(WRITE, &toi_blockwriter_ops,
+                        (char *) toi_cur_sig_page,
+                        PAGE_SIZE);
+}
+
+static int toi_bio_write_header_cleanup(void)
+{
+        int result = 0;
+
+        if (toi_writer_buffer_posn)
+                toi_bio_queue_write(&toi_writer_buffer);
+
+        result = toi_finish_all_io();
+
+        unowned = 0;
+        total_header_bytes = 0;
+
+        /* Set signature to save we have an image */
+        if (!result)
+                result = toi_bio_mark_have_image();
+
+        return result;
+}
+
+/*
+ * toi_bio_read_header_init()
+ *
+ * Description:
+ * 1. Attempt to read the device specified with resume=.
+ * 2. Check the contents of the swap header for our signature.
+ * 3. Warn, ignore, reset and/or continue as appropriate.
+ * 4. If continuing, read the toi_swap configuration section
+ *    of the header and set up block device info so we can read
+ *    the rest of the header & image.
+ *
+ * Returns:
+ * May not return if user choose to reboot at a warning.
+ * -EINVAL if cannot resume at this time. Booting should continue
+ * normally.
+ */
+
+static int toi_bio_read_header_init(void)
+{
+        int result = 0;
+        char buf[32];
+
+        toi_writer_buffer_posn = 0;
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_read_header_init");
+
+        if (!toi_sig_data) {
+                printk(KERN_INFO "toi_bio_read_header_init called when we "
+                                "haven't verified there is an image!\n");
+                return -EINVAL;
+        }
+
+        /*
+         * If the header is not on the resume_swap_dev_t, get the resume device
+         * first.
+         */
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "Header dev_t is %lx.",
+                        toi_sig_data->header_dev_t);
+        if (toi_sig_data->have_uuid) {
+                struct fs_info seek;
+                dev_t device;
+
+                strncpy((char *) seek.uuid, toi_sig_data->header_uuid, 16);
+                seek.dev_t = toi_sig_data->header_dev_t;
+                seek.last_mount_size = 0;
+                device = blk_lookup_fs_info(&seek);
+                if (device) {
+                        printk("Using dev_t %s, returned by blk_lookup_fs_info.\n",
+                                        format_dev_t(buf, device));
+                        toi_sig_data->header_dev_t = device;
+                }
+        }
+        if (toi_sig_data->header_dev_t != resume_dev_t) {
+                header_block_device = toi_open_bdev(NULL,
+                                toi_sig_data->header_dev_t, 1);
+
+                if (IS_ERR(header_block_device))
+                        return PTR_ERR(header_block_device);
+        } else
+                header_block_device = resume_block_device;
+
+        if (!toi_writer_buffer)
+                toi_writer_buffer = (char *) toi_get_zeroed_page(11,
+                                TOI_ATOMIC_GFP);
+        more_readahead = 1;
+
+        /*
+         * Read toi_swap configuration.
+         * Headerblock size taken into account already.
+         */
+        result = toi_bio_ops.bdev_page_io(READ, header_block_device,
+                        toi_sig_data->first_header_block,
+                        virt_to_page((unsigned long) toi_writer_buffer));
+        if (result)
+                return result;
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "load extent chains.");
+        result = toi_load_extent_chains();
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "load original signature page.");
+        toi_orig_sig_page = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
+        if (!toi_orig_sig_page) {
+                printk(KERN_ERR "Failed to allocate memory for the current"
+                        " image signature.\n");
+                return -ENOMEM;
+        }
+
+        return toi_rw_header_chunk_noreadahead(READ, &toi_blockwriter_ops,
+                        (char *) toi_orig_sig_page,
+                        PAGE_SIZE);
+}
+
+static int toi_bio_read_header_cleanup(void)
+{
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_read_header_cleanup.");
+        return toi_rw_cleanup(READ);
+}
+
+/* Works only for digits and letters, but small and fast */
+#define TOLOWER(x) ((x) | 0x20)
+
+/*
+ * UUID must be 32 chars long. It may have dashes, but nothing
+ * else.
+ */
+char *uuid_from_commandline(char *commandline)
+{
+        int low = 0;
+        char *result = NULL, *output, *ptr;
+
+        if (strncmp(commandline, "UUID=", 5))
+                return NULL;
+
+        result = kzalloc(17, GFP_KERNEL);
+        if (!result) {
+                printk("Failed to kzalloc UUID text memory.\n");
+                return NULL;
+        }
+
+        ptr = commandline + 5;
+        output = result;
+
+        while (*ptr && (output - result) < 16) {
+                if (isxdigit(*ptr)) {
+                        int value = isdigit(*ptr) ? *ptr - '0' :
+                                TOLOWER(*ptr) - 'a' + 10;
+                        if (low) {
+                                *output += value;
+                                output++;
+                        } else {
+                                *output = value << 4;
+                        }
+                        low = !low;
+                } else if (*ptr != '-')
+                        break;
+                ptr++;
+        }
+
+        if ((output - result) < 16 || *ptr) {
+                printk(KERN_DEBUG "Found resume=UUID=, but the value looks "
+                                "invalid.\n");
+                kfree(result);
+                result = NULL;
+        }
+
+        return result;
+}
+
+#define retry_if_fails(command) \
+do { \
+        command; \
+        if (!resume_dev_t && !waited_for_device_probe) { \
+                wait_for_device_probe(); \
+                command; \
+                waited_for_device_probe = 1; \
+        } \
+} while(0)
+
+/**
+ * try_to_open_resume_device: Try to parse and open resume=
+ *
+ * Any "swap:" has been stripped away and we just have the path to deal with.
+ * We attempt to do name_to_dev_t, open and stat the file. Having opened the
+ * file, get the struct block_device * to match.
+ */
+static int try_to_open_resume_device(char *commandline, int quiet)
+{
+        struct kstat stat;
+        int error = 0;
+        char *uuid = uuid_from_commandline(commandline);
+        int waited_for_device_probe = 0;
+
+        resume_dev_t = MKDEV(0, 0);
+
+        if (!strlen(commandline))
+                retry_if_fails(toi_bio_scan_for_image(quiet));
+
+        if (uuid) {
+                struct fs_info seek;
+                strncpy((char *) &seek.uuid, uuid, 16);
+                seek.dev_t = resume_dev_t;
+                seek.last_mount_size = 0;
+                retry_if_fails(resume_dev_t = blk_lookup_fs_info(&seek));
+                kfree(uuid);
+        }
+
+        if (!resume_dev_t)
+                retry_if_fails(resume_dev_t = name_to_dev_t(commandline));
+
+        if (!resume_dev_t) {
+                struct file *file = filp_open(commandline,
+                                O_RDONLY|O_LARGEFILE, 0);
+
+                if (!IS_ERR(file) && file) {
+                        vfs_getattr(&file->f_path, &stat);
+                        filp_close(file, NULL);
+                } else
+                        error = vfs_stat(commandline, &stat);
+                if (!error)
+                        resume_dev_t = stat.rdev;
+        }
+
+        if (!resume_dev_t) {
+                if (quiet)
+                        return 1;
+
+                if (test_toi_state(TOI_TRYING_TO_RESUME))
+                        toi_early_boot_message(1, toi_translate_err_default,
+                          "Failed to translate \"%s\" into a device id.\n",
+                          commandline);
+                else
+                        printk("TuxOnIce: Can't translate \"%s\" into a device "
+                                        "id yet.\n", commandline);
+                return 1;
+        }
+
+        return open_resume_dev_t(1, quiet);
+}
+
+/*
+ * Parse Image Location
+ *
+ * Attempt to parse a resume= parameter.
+ * Swap Writer accepts:
+ * resume=[swap:|file:]DEVNAME[:FIRSTBLOCK][@BLOCKSIZE]
+ *
+ * Where:
+ * DEVNAME is convertable to a dev_t by name_to_dev_t
+ * FIRSTBLOCK is the location of the first block in the swap file
+ * (specifying for a swap partition is nonsensical but not prohibited).
+ * Data is validated by attempting to read a swap header from the
+ * location given. Failure will result in toi_swap refusing to
+ * save an image, and a reboot with correct parameters will be
+ * necessary.
+ */
+static int toi_bio_parse_sig_location(char *commandline,
+                int only_allocator, int quiet)
+{
+        char *thischar, *devstart, *colon = NULL;
+        int signature_found, result = -EINVAL, temp_result = 0;
+
+        if (strncmp(commandline, "swap:", 5) &&
+            strncmp(commandline, "file:", 5)) {
+                /*
+                 * Failing swap:, we'll take a simple resume=/dev/hda2, or a
+                 * blank value (scan) but fall through to other allocators
+                 * if /dev/ or UUID= isn't matched.
+                 */
+                if (strncmp(commandline, "/dev/", 5) &&
+                    strncmp(commandline, "UUID=", 5) &&
+                    strlen(commandline))
+                        return 1;
+        } else
+                commandline += 5;
+
+        devstart = commandline;
+        thischar = commandline;
+        while ((*thischar != ':') && (*thischar != '@') &&
+                ((thischar - commandline) < 250) && (*thischar))
+                thischar++;
+
+        if (*thischar == ':') {
+                colon = thischar;
+                *colon = 0;
+                thischar++;
+        }
+
+        while ((thischar - commandline) < 250 && *thischar)
+                thischar++;
+
+        if (colon) {
+                unsigned long block;
+                temp_result = kstrtoul(colon + 1, 0, &block);
+                if (!temp_result)
+                        resume_firstblock = (int) block;
+        } else
+                resume_firstblock = 0;
+
+        clear_toi_state(TOI_CAN_HIBERNATE);
+        clear_toi_state(TOI_CAN_RESUME);
+
+        if (!temp_result)
+                temp_result = try_to_open_resume_device(devstart, quiet);
+
+        if (colon)
+                *colon = ':';
+
+        /* No error if we only scanned */
+        if (temp_result)
+                return strlen(commandline) ? -EINVAL : 1;
+
+        signature_found = toi_bio_image_exists(quiet);
+
+        if (signature_found != -1) {
+                result = 0;
+                /*
+                 * TODO: If only file storage, CAN_HIBERNATE should only be
+                 * set if file allocator's target is valid.
+                 */
+                set_toi_state(TOI_CAN_HIBERNATE);
+                set_toi_state(TOI_CAN_RESUME);
+        } else
+                if (!quiet)
+                        printk(KERN_ERR "TuxOnIce: Block I/O: No "
+                                "signature found at %s.\n", devstart);
+
+        return result;
+}
+
+static void toi_bio_release_storage(void)
+{
+        header_pages_reserved = 0;
+        raw_pages_allocd = 0;
+
+        free_all_bdev_info();
+}
+
+/* toi_swap_remove_image
+ *
+ */
+static int toi_bio_remove_image(void)
+{
+        int result;
+
+        toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_remove_image.");
+
+        result = toi_bio_restore_original_signature();
+
+        /*
+         * We don't do a sanity check here: we want to restore the swap
+         * whatever version of kernel made the hibernate image.
+         *
+         * We need to write swap, but swap may not be enabled so
+         * we write the device directly
+         *
+         * If we don't have an current_signature_page, we didn't
+         * read an image header, so don't change anything.
+         */
+
+        toi_bio_release_storage();
+
+        return result;
+}
+
+struct toi_bio_ops toi_bio_ops = {
+        .bdev_page_io = toi_bdev_page_io,
+        .register_storage = toi_register_storage_chain,
+        .free_storage = toi_bio_release_storage,
+};
+
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_INT("target_outstanding_io", SYSFS_RW, &target_outstanding_io,
+                        0, 16384, 0, NULL),
+};
+
+struct toi_module_ops toi_blockwriter_ops = {
+        .type                                = WRITER_MODULE,
+        .name                                = "block i/o",
+        .directory                        = "block_io",
+        .module                                = THIS_MODULE,
+        .memory_needed                        = toi_bio_memory_needed,
+        .print_debug_info                = toi_bio_print_debug_stats,
+        .storage_needed                        = toi_bio_storage_needed,
+        .save_config_info                = toi_bio_save_config_info,
+        .load_config_info                = toi_bio_load_config_info,
+        .initialise                        = toi_bio_initialise,
+        .cleanup                        = toi_bio_cleanup,
+        .post_atomic_restore                = toi_bio_chains_post_atomic,
+
+        .rw_init                        = toi_rw_init,
+        .rw_cleanup                        = toi_rw_cleanup,
+        .read_page                        = toi_bio_read_page,
+        .write_page                        = toi_bio_write_page,
+        .rw_header_chunk                = toi_rw_header_chunk,
+        .rw_header_chunk_noreadahead        = toi_rw_header_chunk_noreadahead,
+        .io_flusher                        = bio_io_flusher,
+        .update_throughput_throttle        = update_throughput_throttle,
+        .finish_all_io                        = toi_finish_all_io,
+
+        .noresume_reset                        = toi_bio_noresume_reset,
+        .storage_available                 = toi_bio_storage_available,
+        .storage_allocated                = toi_bio_storage_allocated,
+        .reserve_header_space                = toi_bio_reserve_header_space,
+        .allocate_storage                = toi_bio_allocate_storage,
+        .free_unused_storage            = toi_bio_free_unused_storage,
+        .image_exists                        = toi_bio_image_exists,
+        .mark_resume_attempted                = toi_bio_mark_resume_attempted,
+        .write_header_init                = toi_bio_write_header_init,
+        .write_header_cleanup                = toi_bio_write_header_cleanup,
+        .read_header_init                = toi_bio_read_header_init,
+        .read_header_cleanup                = toi_bio_read_header_cleanup,
+        .get_header_version                = toi_bio_get_header_version,
+        .remove_image                        = toi_bio_remove_image,
+        .parse_sig_location                = toi_bio_parse_sig_location,
+
+        .sysfs_data                        = sysfs_params,
+        .num_sysfs_entries                = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+/**
+ * toi_block_io_load - load time routine for block I/O module
+ *
+ * Register block i/o ops and sysfs entries.
+ **/
+static __init int toi_block_io_load(void)
+{
+        return toi_register_module(&toi_blockwriter_ops);
+}
+
+late_initcall(toi_block_io_load);
diff -Nur linux-4.2/kernel/power/tuxonice_bio_internal.h linux-4.2-pck/kernel/power/tuxonice_bio_internal.h
--- linux-4.2/kernel/power/tuxonice_bio_internal.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_bio_internal.h	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,101 @@
+/*
+ * kernel/power/tuxonice_bio_internal.h
+ *
+ * Copyright (C) 2009-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file contains declarations for functions exported from
+ * tuxonice_bio.c, which contains low level io functions.
+ */
+
+/* Extent chains */
+void toi_extent_state_goto_start(void);
+void toi_extent_state_save(int slot);
+int go_next_page(int writing, int section_barrier);
+void toi_extent_state_restore(int slot);
+void free_all_bdev_info(void);
+int devices_of_same_priority(struct toi_bdev_info *this);
+int toi_register_storage_chain(struct toi_bdev_info *new);
+int toi_serialise_extent_chains(void);
+int toi_load_extent_chains(void);
+int toi_bio_rw_page(int writing, struct page *page, int is_readahead,
+                int free_group);
+int toi_bio_restore_original_signature(void);
+int toi_bio_devinfo_storage_needed(void);
+unsigned long get_headerblock(void);
+dev_t get_header_dev_t(void);
+struct block_device *get_header_bdev(void);
+int toi_bio_allocate_storage(unsigned long request);
+void toi_bio_free_unused_storage(void);
+
+/* Signature functions */
+#define HaveImage "HaveImage"
+#define NoImage "TuxOnIce"
+#define sig_size (sizeof(HaveImage))
+
+struct sig_data {
+        char sig[sig_size];
+        int have_image;
+        int resumed_before;
+
+        char have_uuid;
+        char header_uuid[17];
+        dev_t header_dev_t;
+        unsigned long first_header_block;
+
+        /* Repeat the signature to be sure we have a header version */
+        char sig2[sig_size];
+        int header_version;
+};
+
+void forget_signature_page(void);
+int toi_check_for_signature(void);
+int toi_bio_image_exists(int quiet);
+int get_signature_page(void);
+int toi_bio_mark_resume_attempted(int);
+extern char *toi_cur_sig_page;
+extern char *toi_orig_sig_page;
+int toi_bio_mark_have_image(void);
+extern struct sig_data *toi_sig_data;
+extern dev_t resume_dev_t;
+extern struct block_device *resume_block_device;
+extern struct block_device *header_block_device;
+extern unsigned long resume_firstblock;
+
+struct block_device *open_bdev(dev_t device, int display_errs);
+extern int current_stream;
+extern int more_readahead;
+int toi_do_io(int writing, struct block_device *bdev, long block0,
+        struct page *page, int is_readahead, int syncio, int free_group);
+int get_main_pool_phys_params(void);
+
+void toi_close_bdev(struct block_device *bdev);
+struct block_device *toi_open_bdev(char *uuid, dev_t default_device,
+                int display_errs);
+
+extern struct toi_module_ops toi_blockwriter_ops;
+void dump_block_chains(void);
+void debug_broken_header(void);
+extern unsigned long raw_pages_allocd, header_pages_reserved;
+int toi_bio_chains_debug_info(char *buffer, int size);
+void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd);
+int toi_bio_scan_for_image(int quiet);
+int toi_bio_get_header_version(void);
+
+void close_resume_dev_t(int force);
+int open_resume_dev_t(int force, int quiet);
+
+struct toi_incremental_image_pointer_saved_data {
+    unsigned long block;
+    int chain;
+};
+
+struct toi_incremental_image_pointer {
+    struct toi_incremental_image_pointer_saved_data save;
+    struct block_device *bdev;
+    unsigned long block;
+};
+
+void toi_bio_store_inc_image_ptr(struct toi_incremental_image_pointer *ptr);
+void toi_bio_restore_inc_image_ptr(struct toi_incremental_image_pointer *ptr);
diff -Nur linux-4.2/kernel/power/tuxonice_bio_signature.c linux-4.2-pck/kernel/power/tuxonice_bio_signature.c
--- linux-4.2/kernel/power/tuxonice_bio_signature.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_bio_signature.c	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,403 @@
+/*
+ * kernel/power/tuxonice_bio_signature.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ */
+
+#include <linux/fs_uuid.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_io.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_bio_internal.h"
+
+struct sig_data *toi_sig_data;
+
+/* Struct of swap header pages */
+
+struct old_sig_data {
+        dev_t device;
+        unsigned long sector;
+        int resume_attempted;
+        int orig_sig_type;
+};
+
+union diskpage {
+        union swap_header swh;        /* swh.magic is the only member used */
+        struct sig_data sig_data;
+        struct old_sig_data old_sig_data;
+};
+
+union p_diskpage {
+        union diskpage *pointer;
+        char *ptr;
+        unsigned long address;
+};
+
+char *toi_cur_sig_page;
+char *toi_orig_sig_page;
+int have_image;
+int have_old_image;
+
+int get_signature_page(void)
+{
+        if (!toi_cur_sig_page) {
+                toi_message(TOI_IO, TOI_VERBOSE, 0,
+                                "Allocating current signature page.");
+                toi_cur_sig_page = (char *) toi_get_zeroed_page(38,
+                        TOI_ATOMIC_GFP);
+                if (!toi_cur_sig_page) {
+                        printk(KERN_ERR "Failed to allocate memory for the "
+                                "current image signature.\n");
+                        return -ENOMEM;
+                }
+
+                toi_sig_data = (struct sig_data *) toi_cur_sig_page;
+        }
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Reading signature from dev %lx,"
+                        " sector %d.",
+                        resume_block_device->bd_dev, resume_firstblock);
+
+        return toi_bio_ops.bdev_page_io(READ, resume_block_device,
+                resume_firstblock, virt_to_page(toi_cur_sig_page));
+}
+
+void forget_signature_page(void)
+{
+        if (toi_cur_sig_page) {
+                toi_sig_data = NULL;
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_cur_sig_page"
+                                " (%p).", toi_cur_sig_page);
+                toi_free_page(38, (unsigned long) toi_cur_sig_page);
+                toi_cur_sig_page = NULL;
+        }
+
+        if (toi_orig_sig_page) {
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_orig_sig_page"
+                                " (%p).", toi_orig_sig_page);
+                toi_free_page(38, (unsigned long) toi_orig_sig_page);
+                toi_orig_sig_page = NULL;
+        }
+}
+
+/*
+ * We need to ensure we use the signature page that's currently on disk,
+ * so as to not remove the image header. Post-atomic-restore, the orig sig
+ * page will be empty, so we can use that as our method of knowing that we
+ * need to load the on-disk signature and not use the non-image sig in
+ * memory. (We're going to powerdown after writing the change, so it's safe.
+ */
+int toi_bio_mark_resume_attempted(int flag)
+{
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Make resume attempted = %d.",
+                        flag);
+        if (!toi_orig_sig_page) {
+                forget_signature_page();
+                get_signature_page();
+        }
+        toi_sig_data->resumed_before = flag;
+        return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+                resume_firstblock, virt_to_page(toi_cur_sig_page));
+}
+
+int toi_bio_mark_have_image(void)
+{
+        int result = 0;
+        char buf[32];
+        struct fs_info *fs_info;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that an image exists.");
+        memcpy(toi_sig_data->sig, tuxonice_signature,
+                        sizeof(tuxonice_signature));
+        toi_sig_data->have_image = 1;
+        toi_sig_data->resumed_before = 0;
+        toi_sig_data->header_dev_t = get_header_dev_t();
+        toi_sig_data->have_uuid = 0;
+
+        fs_info = fs_info_from_block_dev(get_header_bdev());
+        if (fs_info && !IS_ERR(fs_info)) {
+                memcpy(toi_sig_data->header_uuid, &fs_info->uuid, 16);
+                free_fs_info(fs_info);
+        } else
+                result = (int) PTR_ERR(fs_info);
+
+        if (!result) {
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Got uuid for dev_t %s.",
+                                format_dev_t(buf, get_header_dev_t()));
+                toi_sig_data->have_uuid = 1;
+        } else
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Could not get uuid for "
+                                "dev_t %s.",
+                                format_dev_t(buf, get_header_dev_t()));
+
+        toi_sig_data->first_header_block = get_headerblock();
+        have_image = 1;
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is %x. First block "
+                        "is %d.", toi_sig_data->header_dev_t,
+                        toi_sig_data->first_header_block);
+
+        memcpy(toi_sig_data->sig2, tuxonice_signature,
+                        sizeof(tuxonice_signature));
+        toi_sig_data->header_version = TOI_HEADER_VERSION;
+
+        return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+                resume_firstblock, virt_to_page(toi_cur_sig_page));
+}
+
+int remove_old_signature(void)
+{
+        union p_diskpage swap_header_page = (union p_diskpage) toi_cur_sig_page;
+        char *orig_sig;
+        char *header_start = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
+        int result;
+        struct block_device *header_bdev;
+        struct old_sig_data *old_sig_data =
+                &swap_header_page.pointer->old_sig_data;
+
+        header_bdev = toi_open_bdev(NULL, old_sig_data->device, 1);
+        result = toi_bio_ops.bdev_page_io(READ, header_bdev,
+                        old_sig_data->sector, virt_to_page(header_start));
+
+        if (result)
+                goto out;
+
+        /*
+         * TODO: Get the original contents of the first bytes of the swap
+         * header page.
+         */
+        if (!old_sig_data->orig_sig_type)
+                orig_sig = "SWAP-SPACE";
+        else
+                orig_sig = "SWAPSPACE2";
+
+        memcpy(swap_header_page.pointer->swh.magic.magic, orig_sig, 10);
+        memcpy(swap_header_page.ptr, header_start, 10);
+
+        result = toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+                resume_firstblock, virt_to_page(swap_header_page.ptr));
+
+out:
+        toi_close_bdev(header_bdev);
+        have_old_image = 0;
+        toi_free_page(38, (unsigned long) header_start);
+        return result;
+}
+
+/*
+ * toi_bio_restore_original_signature - restore the original signature
+ *
+ * At boot time (aborting pre atomic-restore), toi_orig_sig_page gets used.
+ * It will have the original signature page contents, stored in the image
+ * header. Post atomic-restore, we use :toi_cur_sig_page, which will contain
+ * the contents that were loaded when we started the cycle.
+ */
+int toi_bio_restore_original_signature(void)
+{
+        char *use = toi_orig_sig_page ? toi_orig_sig_page : toi_cur_sig_page;
+
+        if (have_old_image)
+                return remove_old_signature();
+
+        if (!use) {
+                printk("toi_bio_restore_original_signature: No signature "
+                                "page loaded.\n");
+                return 0;
+        }
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that no image exists.");
+        have_image = 0;
+        toi_sig_data->have_image = 0;
+        return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+                resume_firstblock, virt_to_page(use));
+}
+
+/*
+ * check_for_signature - See whether we have an image.
+ *
+ * Returns 0 if no image, 1 if there is one, -1 if indeterminate.
+ */
+int toi_check_for_signature(void)
+{
+        union p_diskpage swap_header_page;
+        int type;
+        const char *normal_sigs[] = {"SWAP-SPACE", "SWAPSPACE2" };
+        const char *swsusp_sigs[] = {"S1SUSP", "S2SUSP", "S1SUSPEND" };
+        char *swap_header;
+
+        if (!toi_cur_sig_page) {
+                int result = get_signature_page();
+
+                if (result)
+                        return result;
+        }
+
+        /*
+         * Start by looking for the binary header.
+         */
+        if (!memcmp(tuxonice_signature, toi_cur_sig_page,
+                                sizeof(tuxonice_signature))) {
+                have_image = toi_sig_data->have_image;
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Have binary signature. "
+                                "Have image is %d.", have_image);
+                if (have_image)
+                        toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is "
+                                        "%x. First block is %d.",
+                                        toi_sig_data->header_dev_t,
+                                        toi_sig_data->first_header_block);
+                return toi_sig_data->have_image;
+        }
+
+        /*
+         * Failing that, try old file allocator headers.
+         */
+
+        if (!memcmp(HaveImage, toi_cur_sig_page, strlen(HaveImage))) {
+                have_image = 1;
+                return 1;
+        }
+
+        have_image = 0;
+
+        if (!memcmp(NoImage, toi_cur_sig_page, strlen(NoImage)))
+                return 0;
+
+        /*
+         * Nope? How about swap?
+         */
+        swap_header_page = (union p_diskpage) toi_cur_sig_page;
+        swap_header = swap_header_page.pointer->swh.magic.magic;
+
+        /* Normal swapspace? */
+        for (type = 0; type < 2; type++)
+                if (!memcmp(normal_sigs[type], swap_header,
+                                        strlen(normal_sigs[type])))
+                        return 0;
+
+        /* Swsusp or uswsusp? */
+        for (type = 0; type < 3; type++)
+                if (!memcmp(swsusp_sigs[type], swap_header,
+                                        strlen(swsusp_sigs[type])))
+                        return 2;
+
+        /* Old TuxOnIce version? */
+        if (!memcmp(tuxonice_signature, swap_header,
+                                sizeof(tuxonice_signature) - 1)) {
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Found old TuxOnIce "
+                                "signature.");
+                have_old_image = 1;
+                return 3;
+        }
+
+        return -1;
+}
+
+/*
+ * Image_exists
+ *
+ * Returns -1 if don't know, otherwise 0 (no) or 1 (yes).
+ */
+int toi_bio_image_exists(int quiet)
+{
+        int result;
+        char *msg = NULL;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_image_exists.");
+
+        if (!resume_dev_t) {
+                if (!quiet)
+                        printk(KERN_INFO "Not even trying to read header "
+                                "because resume_dev_t is not set.\n");
+                return -1;
+        }
+
+        if (open_resume_dev_t(0, quiet))
+                return -1;
+
+        result = toi_check_for_signature();
+
+        clear_toi_state(TOI_RESUMED_BEFORE);
+        if (toi_sig_data->resumed_before)
+                set_toi_state(TOI_RESUMED_BEFORE);
+
+        if (quiet || result == -ENOMEM)
+                return result;
+
+        if (result == -1)
+                msg = "TuxOnIce: Unable to find a signature."
+                                " Could you have moved a swap file?\n";
+        else if (!result)
+                msg = "TuxOnIce: No image found.\n";
+        else if (result == 1)
+                msg = "TuxOnIce: Image found.\n";
+        else if (result == 2)
+                msg = "TuxOnIce: uswsusp or swsusp image found.\n";
+        else if (result == 3)
+                msg = "TuxOnIce: Old implementation's signature found.\n";
+
+        printk(KERN_INFO "%s", msg);
+
+        return result;
+}
+
+int toi_bio_scan_for_image(int quiet)
+{
+        struct block_device *bdev;
+        char default_name[255] = "";
+
+        if (!quiet)
+                printk(KERN_DEBUG "Scanning swap devices for TuxOnIce "
+                                "signature...\n");
+        for (bdev = next_bdev_of_type(NULL, "swap"); bdev;
+                                bdev = next_bdev_of_type(bdev, "swap")) {
+                int result;
+                char name[255] = "";
+                sprintf(name, "%u:%u", MAJOR(bdev->bd_dev),
+                                MINOR(bdev->bd_dev));
+                if (!quiet)
+                        printk(KERN_DEBUG "- Trying %s.\n", name);
+                resume_block_device = bdev;
+                resume_dev_t = bdev->bd_dev;
+
+                result = toi_check_for_signature();
+
+                resume_block_device = NULL;
+                resume_dev_t = MKDEV(0, 0);
+
+                if (!default_name[0])
+                        strcpy(default_name, name);
+
+                if (result == 1) {
+                        /* Got one! */
+                        strcpy(resume_file, name);
+                        next_bdev_of_type(bdev, NULL);
+                        if (!quiet)
+                                printk(KERN_DEBUG " ==> Image found on %s.\n",
+                                                resume_file);
+                        return 1;
+                }
+                forget_signature_page();
+        }
+
+        if (!quiet)
+                printk(KERN_DEBUG "TuxOnIce scan: No image found.\n");
+        strcpy(resume_file, default_name);
+        return 0;
+}
+
+int toi_bio_get_header_version(void)
+{
+        return (memcmp(toi_sig_data->sig2, tuxonice_signature,
+                                sizeof(tuxonice_signature))) ?
+                0 : toi_sig_data->header_version;
+
+}
diff -Nur linux-4.2/kernel/power/tuxonice_builtin.c linux-4.2-pck/kernel/power/tuxonice_builtin.c
--- linux-4.2/kernel/power/tuxonice_builtin.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_builtin.c	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,498 @@
+/*
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/kernel.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/bio.h>
+#include <linux/root_dev.h>
+#include <linux/freezer.h>
+#include <linux/reboot.h>
+#include <linux/writeback.h>
+#include <linux/tty.h>
+#include <linux/crypto.h>
+#include <linux/cpu.h>
+#include <linux/ctype.h>
+#include <linux/kthread.h>
+#include "tuxonice_io.h"
+#include "tuxonice.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_alloc.h"
+
+unsigned long toi_bootflags_mask;
+
+/*
+ * Highmem related functions (x86 only).
+ */
+
+#ifdef CONFIG_HIGHMEM
+
+/**
+ * copyback_high: Restore highmem pages.
+ *
+ * Highmem data and pbe lists are/can be stored in highmem.
+ * The format is slightly different to the lowmem pbe lists
+ * used for the assembly code: the last pbe in each page is
+ * a struct page * instead of struct pbe *, pointing to the
+ * next page where pbes are stored (or NULL if happens to be
+ * the end of the list). Since we don't want to generate
+ * unnecessary deltas against swsusp code, we use a cast
+ * instead of a union.
+ **/
+
+static void copyback_high(void)
+{
+        struct page *pbe_page = (struct page *) restore_highmem_pblist;
+        struct pbe *this_pbe, *first_pbe;
+        unsigned long *origpage, *copypage;
+        int pbe_index = 1;
+
+        if (!pbe_page)
+                return;
+
+        this_pbe = (struct pbe *) kmap_atomic(pbe_page);
+        first_pbe = this_pbe;
+
+        while (this_pbe) {
+                int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1;
+
+                origpage = kmap_atomic(pfn_to_page((unsigned long) this_pbe->orig_address));
+                copypage = kmap_atomic((struct page *) this_pbe->address);
+
+                while (loop >= 0) {
+                        *(origpage + loop) = *(copypage + loop);
+                        loop--;
+                }
+
+                kunmap_atomic(origpage);
+                kunmap_atomic(copypage);
+
+                if (!this_pbe->next)
+                        break;
+
+                if (pbe_index < PBES_PER_PAGE) {
+                        this_pbe++;
+                        pbe_index++;
+                } else {
+                        pbe_page = (struct page *) this_pbe->next;
+                        kunmap_atomic(first_pbe);
+                        if (!pbe_page)
+                                return;
+                        this_pbe = (struct pbe *) kmap_atomic(pbe_page);
+                        first_pbe = this_pbe;
+                        pbe_index = 1;
+                }
+        }
+        kunmap_atomic(first_pbe);
+}
+
+#else /* CONFIG_HIGHMEM */
+static void copyback_high(void) { }
+#endif
+
+char toi_wait_for_keypress_dev_console(int timeout)
+{
+        int fd, this_timeout = 255, orig_kthread = 0;
+        char key = '\0';
+        struct termios t, t_backup;
+
+        /* We should be guaranteed /dev/console exists after populate_rootfs()
+         * in init/main.c.
+         */
+        fd = sys_open("/dev/console", O_RDONLY, 0);
+        if (fd < 0) {
+                printk(KERN_INFO "Couldn't open /dev/console.\n");
+                return key;
+        }
+
+        if (sys_ioctl(fd, TCGETS, (long)&t) < 0)
+                goto out_close;
+
+        memcpy(&t_backup, &t, sizeof(t));
+
+        t.c_lflag &= ~(ISIG|ICANON|ECHO);
+        t.c_cc[VMIN] = 0;
+
+new_timeout:
+        if (timeout > 0) {
+                this_timeout = timeout < 26 ? timeout : 25;
+                timeout -= this_timeout;
+                this_timeout *= 10;
+        }
+
+        t.c_cc[VTIME] = this_timeout;
+
+        if (sys_ioctl(fd, TCSETS, (long)&t) < 0)
+                goto out_restore;
+
+        if (current->flags & PF_KTHREAD) {
+            orig_kthread = (current->flags & PF_KTHREAD);
+            current->flags &= ~PF_KTHREAD;
+        }
+
+        while (1) {
+                if (sys_read(fd, &key, 1) <= 0) {
+                        if (timeout)
+                                goto new_timeout;
+                        key = '\0';
+                        break;
+                }
+                key = tolower(key);
+                if (test_toi_state(TOI_SANITY_CHECK_PROMPT)) {
+                        if (key == 'c') {
+                                set_toi_state(TOI_CONTINUE_REQ);
+                                break;
+                        } else if (key == ' ')
+                                break;
+                } else
+                        break;
+        }
+        if (orig_kthread) {
+            current->flags |= PF_KTHREAD;
+        }
+
+out_restore:
+        sys_ioctl(fd, TCSETS, (long)&t_backup);
+out_close:
+        sys_close(fd);
+
+        return key;
+}
+
+struct toi_boot_kernel_data toi_bkd __nosavedata
+                __attribute__((aligned(PAGE_SIZE))) = {
+        MY_BOOT_KERNEL_DATA_VERSION,
+        0,
+#ifdef CONFIG_TOI_REPLACE_SWSUSP
+        (1 << TOI_REPLACE_SWSUSP) |
+#endif
+        (1 << TOI_NO_FLUSHER_THREAD) |
+        (1 << TOI_PAGESET2_FULL),
+};
+
+struct block_device *toi_open_by_devnum(dev_t dev)
+{
+        struct block_device *bdev = bdget(dev);
+        int err = -ENOMEM;
+        if (bdev)
+                err = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
+        return err ? ERR_PTR(err) : bdev;
+}
+
+/**
+ * toi_close_bdev: Close a swap bdev.
+ *
+ * int: The swap entry number to close.
+ */
+void toi_close_bdev(struct block_device *bdev)
+{
+        blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+}
+
+int toi_wait = CONFIG_TOI_DEFAULT_WAIT;
+struct toi_core_fns *toi_core_fns;
+unsigned long toi_result;
+struct pagedir pagedir1 = {1};
+struct toi_cbw **toi_first_cbw;
+int toi_next_cbw;
+
+unsigned long toi_get_nonconflicting_page(void)
+{
+        return toi_core_fns->get_nonconflicting_page();
+}
+
+int toi_post_context_save(void)
+{
+        return toi_core_fns->post_context_save();
+}
+
+int try_tuxonice_hibernate(void)
+{
+        if (!toi_core_fns)
+                return -ENODEV;
+
+        return toi_core_fns->try_hibernate();
+}
+
+static int num_resume_calls;
+#ifdef CONFIG_TOI_IGNORE_LATE_INITCALL
+static int ignore_late_initcall = 1;
+#else
+static int ignore_late_initcall;
+#endif
+
+int toi_translate_err_default = TOI_CONTINUE_REQ;
+
+void try_tuxonice_resume(void)
+{
+        if (!hibernation_available())
+                return;
+
+        /* Don't let it wrap around eventually */
+        if (num_resume_calls < 2)
+                num_resume_calls++;
+
+        if (num_resume_calls == 1 && ignore_late_initcall) {
+                printk(KERN_INFO "TuxOnIce: Ignoring late initcall, as requested.\n");
+                return;
+        }
+
+        if (toi_core_fns)
+                toi_core_fns->try_resume();
+        else
+                printk(KERN_INFO "TuxOnIce core not loaded yet.\n");
+}
+
+int toi_lowlevel_builtin(void)
+{
+        int error = 0;
+
+        save_processor_state();
+        error = swsusp_arch_suspend();
+        if (error)
+                printk(KERN_ERR "Error %d hibernating\n", error);
+
+        /* Restore control flow appears here */
+        if (!toi_in_hibernate) {
+                copyback_high();
+                set_toi_state(TOI_NOW_RESUMING);
+        }
+
+        restore_processor_state();
+        return error;
+}
+
+unsigned long toi_compress_bytes_in;
+unsigned long toi_compress_bytes_out;
+
+int toi_in_suspend(void)
+{
+  return in_suspend;
+}
+
+unsigned long toi_state = ((1 << TOI_BOOT_TIME) |
+                (1 << TOI_IGNORE_LOGLEVEL) |
+                (1 << TOI_IO_STOPPED));
+
+/* The number of hibernates we have started (some may have been cancelled) */
+unsigned int nr_hibernates;
+int toi_running;
+__nosavedata int toi_in_hibernate;
+__nosavedata struct pbe *restore_highmem_pblist;
+
+int toi_trace_allocs;
+
+void toi_read_lock_tasklist(void)
+{
+        read_lock(&tasklist_lock);
+}
+
+void toi_read_unlock_tasklist(void)
+{
+        read_unlock(&tasklist_lock);
+}
+
+#ifdef CONFIG_TOI_ZRAM_SUPPORT
+int (*toi_flag_zram_disks) (void);
+
+int toi_do_flag_zram_disks(void)
+{
+        return toi_flag_zram_disks ? (*toi_flag_zram_disks)() : 0;
+}
+
+#endif
+
+/* toi_generate_free_page_map
+ *
+ * Description:        This routine generates a bitmap of free pages from the
+ *                 lists used by the memory manager. We then use the bitmap
+ *                 to quickly calculate which pages to save and in which
+ *                 pagesets.
+ */
+void toi_generate_free_page_map(void)
+{
+        int order, cpu, t;
+        unsigned long flags, i;
+        struct zone *zone;
+        struct list_head *curr;
+        unsigned long pfn;
+        struct page *page;
+
+        for_each_populated_zone(zone) {
+
+                if (!zone->spanned_pages)
+                        continue;
+
+                spin_lock_irqsave(&zone->lock, flags);
+
+                for (i = 0; i < zone->spanned_pages; i++) {
+                        pfn = zone->zone_start_pfn + i;
+
+                        if (!pfn_valid(pfn))
+                                continue;
+
+                        page = pfn_to_page(pfn);
+
+                        ClearPageNosaveFree(page);
+                }
+
+                for_each_migratetype_order(order, t) {
+                        list_for_each(curr,
+                                        &zone->free_area[order].free_list[t]) {
+                                unsigned long j;
+
+                                pfn = page_to_pfn(list_entry(curr, struct page,
+                                                        lru));
+                                for (j = 0; j < (1UL << order); j++)
+                                        SetPageNosaveFree(pfn_to_page(pfn + j));
+                        }
+                }
+
+                for_each_online_cpu(cpu) {
+                        struct per_cpu_pageset *pset =
+                                per_cpu_ptr(zone->pageset, cpu);
+                        struct per_cpu_pages *pcp = &pset->pcp;
+                        struct page *page;
+                        int t;
+
+                        for (t = 0; t < MIGRATE_PCPTYPES; t++)
+                                list_for_each_entry(page, &pcp->lists[t], lru)
+                                        SetPageNosaveFree(page);
+                }
+
+                spin_unlock_irqrestore(&zone->lock, flags);
+        }
+}
+
+/* toi_size_of_free_region
+ *
+ * Description:        Return the number of pages that are free, beginning with and
+ *                 including this one.
+ */
+int toi_size_of_free_region(struct zone *zone, unsigned long start_pfn)
+{
+        unsigned long this_pfn = start_pfn,
+                      end_pfn = zone_end_pfn(zone);
+
+        while (pfn_valid(this_pfn) && this_pfn < end_pfn && PageNosaveFree(pfn_to_page(this_pfn)))
+                this_pfn++;
+
+        return this_pfn - start_pfn;
+}
+
+static int __init toi_wait_setup(char *str)
+{
+        int value;
+
+        if (sscanf(str, "=%d", &value)) {
+                if (value < -1 || value > 255)
+                        printk(KERN_INFO "TuxOnIce_wait outside range -1 to "
+                                        "255.\n");
+                else
+                        toi_wait = value;
+        }
+
+        return 1;
+}
+__setup("toi_wait", toi_wait_setup);
+
+static int __init toi_translate_retry_setup(char *str)
+{
+        toi_translate_err_default = 0;
+        return 1;
+}
+__setup("toi_translate_retry", toi_translate_retry_setup);
+
+static int __init toi_debug_setup(char *str)
+{
+        toi_bkd.toi_action |= (1 << TOI_LOGALL);
+        toi_bootflags_mask |= (1 << TOI_LOGALL);
+        toi_bkd.toi_debug_state = 255;
+        toi_bkd.toi_default_console_level = 7;
+        return 1;
+}
+__setup("toi_debug_setup", toi_debug_setup);
+
+static int __init toi_pause_setup(char *str)
+{
+        toi_bkd.toi_action |= (1 << TOI_PAUSE);
+        toi_bootflags_mask |= (1 << TOI_PAUSE);
+        return 1;
+}
+__setup("toi_pause", toi_pause_setup);
+
+#ifdef CONFIG_PM_DEBUG
+static int __init toi_trace_allocs_setup(char *str)
+{
+        int value;
+
+        if (sscanf(str, "=%d", &value))
+                toi_trace_allocs = value;
+
+        return 1;
+}
+__setup("toi_trace_allocs", toi_trace_allocs_setup);
+#endif
+
+static int __init toi_ignore_late_initcall_setup(char *str)
+{
+        int value;
+
+        if (sscanf(str, "=%d", &value))
+                ignore_late_initcall = value;
+
+        return 1;
+}
+__setup("toi_initramfs_resume_only", toi_ignore_late_initcall_setup);
+
+static int __init toi_force_no_multithreaded_setup(char *str)
+{
+        int value;
+
+        toi_bkd.toi_action &= ~(1 << TOI_NO_MULTITHREADED_IO);
+        toi_bootflags_mask |= (1 << TOI_NO_MULTITHREADED_IO);
+
+        if (sscanf(str, "=%d", &value) && value)
+                toi_bkd.toi_action |= (1 << TOI_NO_MULTITHREADED_IO);
+
+        return 1;
+}
+__setup("toi_no_multithreaded", toi_force_no_multithreaded_setup);
+
+#ifdef CONFIG_KGDB
+static int __init toi_post_resume_breakpoint_setup(char *str)
+{
+        int value;
+
+        toi_bkd.toi_action &= ~(1 << TOI_POST_RESUME_BREAKPOINT);
+        toi_bootflags_mask |= (1 << TOI_POST_RESUME_BREAKPOINT);
+        if (sscanf(str, "=%d", &value) && value)
+                toi_bkd.toi_action |= (1 << TOI_POST_RESUME_BREAKPOINT);
+
+        return 1;
+}
+__setup("toi_post_resume_break", toi_post_resume_breakpoint_setup);
+#endif
+
+static int __init toi_disable_readahead_setup(char *str)
+{
+        int value;
+
+        toi_bkd.toi_action &= ~(1 << TOI_NO_READAHEAD);
+        toi_bootflags_mask |= (1 << TOI_NO_READAHEAD);
+        if (sscanf(str, "=%d", &value) && value)
+                toi_bkd.toi_action |= (1 << TOI_NO_READAHEAD);
+
+        return 1;
+}
+__setup("toi_no_readahead", toi_disable_readahead_setup);
diff -Nur linux-4.2/kernel/power/tuxonice_builtin.h linux-4.2-pck/kernel/power/tuxonice_builtin.h
--- linux-4.2/kernel/power/tuxonice_builtin.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_builtin.h	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+#include <asm/setup.h>
+
+extern struct toi_core_fns *toi_core_fns;
+extern unsigned long toi_compress_bytes_in, toi_compress_bytes_out;
+extern unsigned int nr_hibernates;
+extern int toi_in_hibernate;
+
+extern __nosavedata struct pbe *restore_highmem_pblist;
+
+int toi_lowlevel_builtin(void);
+
+#ifdef CONFIG_HIGHMEM
+extern __nosavedata struct zone_data *toi_nosave_zone_list;
+extern __nosavedata unsigned long toi_nosave_max_pfn;
+#endif
+
+extern unsigned long toi_get_nonconflicting_page(void);
+extern int toi_post_context_save(void);
+
+extern char toi_wait_for_keypress_dev_console(int timeout);
+extern struct block_device *toi_open_by_devnum(dev_t dev);
+extern void toi_close_bdev(struct block_device *bdev);
+extern int toi_wait;
+extern int toi_translate_err_default;
+extern int toi_force_no_multithreaded;
+extern void toi_read_lock_tasklist(void);
+extern void toi_read_unlock_tasklist(void);
+extern int toi_in_suspend(void);
+extern void toi_generate_free_page_map(void);
+extern int toi_size_of_free_region(struct zone *zone, unsigned long start_pfn);
+
+#ifdef CONFIG_TOI_ZRAM_SUPPORT
+extern int toi_do_flag_zram_disks(void);
+#else
+#define toi_do_flag_zram_disks() (0)
+#endif
diff -Nur linux-4.2/kernel/power/tuxonice_checksum.c linux-4.2-pck/kernel/power/tuxonice_checksum.c
--- linux-4.2/kernel/power/tuxonice_checksum.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_checksum.c	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,392 @@
+/*
+ * kernel/power/tuxonice_checksum.c
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data checksum routines for TuxOnIce,
+ * using cryptoapi. They are used to locate any modifications
+ * made to pageset 2 while we're saving it.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_io.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_ui.h"
+
+static struct toi_module_ops toi_checksum_ops;
+
+/* Constant at the mo, but I might allow tuning later */
+static char toi_checksum_name[32] = "md4";
+/* Bytes per checksum */
+#define CHECKSUM_SIZE (16)
+
+#define CHECKSUMS_PER_PAGE ((PAGE_SIZE - sizeof(void *)) / CHECKSUM_SIZE)
+
+struct cpu_context {
+        struct crypto_hash *transform;
+        struct hash_desc desc;
+        struct scatterlist sg[2];
+        char *buf;
+};
+
+static DEFINE_PER_CPU(struct cpu_context, contexts);
+static int pages_allocated;
+static unsigned long page_list;
+
+static int toi_num_resaved;
+
+static unsigned long this_checksum, next_page;
+static int checksum_count;
+
+static inline int checksum_pages_needed(void)
+{
+        return DIV_ROUND_UP(pagedir2.size, CHECKSUMS_PER_PAGE);
+}
+
+/* ---- Local buffer management ---- */
+
+/*
+ * toi_checksum_cleanup
+ *
+ * Frees memory allocated for our labours.
+ */
+static void toi_checksum_cleanup(int ending_cycle)
+{
+        int cpu;
+
+        if (ending_cycle) {
+                for_each_online_cpu(cpu) {
+                        struct cpu_context *this = &per_cpu(contexts, cpu);
+                        if (this->transform) {
+                                crypto_free_hash(this->transform);
+                                this->transform = NULL;
+                                this->desc.tfm = NULL;
+                        }
+
+                        if (this->buf) {
+                                toi_free_page(27, (unsigned long) this->buf);
+                                this->buf = NULL;
+                        }
+                }
+        }
+}
+
+/*
+ * toi_crypto_initialise
+ *
+ * Prepare to do some work by allocating buffers and transforms.
+ * Returns: Int: Zero. Even if we can't set up checksum, we still
+ * seek to hibernate.
+ */
+static int toi_checksum_initialise(int starting_cycle)
+{
+        int cpu;
+
+        if (!(starting_cycle & SYSFS_HIBERNATE) || !toi_checksum_ops.enabled)
+                return 0;
+
+        if (!*toi_checksum_name) {
+                printk(KERN_INFO "TuxOnIce: No checksum algorithm name set.\n");
+                return 1;
+        }
+
+        for_each_online_cpu(cpu) {
+                struct cpu_context *this = &per_cpu(contexts, cpu);
+                struct page *page;
+
+                this->transform = crypto_alloc_hash(toi_checksum_name, 0, 0);
+                if (IS_ERR(this->transform)) {
+                        printk(KERN_INFO "TuxOnIce: Failed to initialise the "
+                                "%s checksum algorithm: %ld.\n",
+                                toi_checksum_name, (long) this->transform);
+                        this->transform = NULL;
+                        return 1;
+                }
+
+                this->desc.tfm = this->transform;
+                this->desc.flags = 0;
+
+                page = toi_alloc_page(27, GFP_KERNEL);
+                if (!page)
+                        return 1;
+                this->buf = page_address(page);
+                sg_init_one(&this->sg[0], this->buf, PAGE_SIZE);
+        }
+        return 0;
+}
+
+/*
+ * toi_checksum_print_debug_stats
+ * @buffer: Pointer to a buffer into which the debug info will be printed.
+ * @size: Size of the buffer.
+ *
+ * Print information to be recorded for debugging purposes into a buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+
+static int toi_checksum_print_debug_stats(char *buffer, int size)
+{
+        int len;
+
+        if (!toi_checksum_ops.enabled)
+                return scnprintf(buffer, size,
+                        "- Checksumming disabled.\n");
+
+        len = scnprintf(buffer, size, "- Checksum method is '%s'.\n",
+                        toi_checksum_name);
+        len += scnprintf(buffer + len, size - len,
+                "  %d pages resaved in atomic copy.\n", toi_num_resaved);
+        return len;
+}
+
+static int toi_checksum_memory_needed(void)
+{
+        return toi_checksum_ops.enabled ?
+                checksum_pages_needed() << PAGE_SHIFT : 0;
+}
+
+static int toi_checksum_storage_needed(void)
+{
+        if (toi_checksum_ops.enabled)
+                return strlen(toi_checksum_name) + sizeof(int) + 1;
+        else
+                return 0;
+}
+
+/*
+ * toi_checksum_save_config_info
+ * @buffer: Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Save informaton needed when reloading the image at resume time.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_checksum_save_config_info(char *buffer)
+{
+        int namelen = strlen(toi_checksum_name) + 1;
+        int total_len;
+
+        *((unsigned int *) buffer) = namelen;
+        strncpy(buffer + sizeof(unsigned int), toi_checksum_name, namelen);
+        total_len = sizeof(unsigned int) + namelen;
+        return total_len;
+}
+
+/* toi_checksum_load_config_info
+ * @buffer: Pointer to the start of the data.
+ * @size: Number of bytes that were saved.
+ *
+ * Description:        Reload information needed for dechecksuming the image at
+ * resume time.
+ */
+static void toi_checksum_load_config_info(char *buffer, int size)
+{
+        int namelen;
+
+        namelen = *((unsigned int *) (buffer));
+        strncpy(toi_checksum_name, buffer + sizeof(unsigned int),
+                        namelen);
+        return;
+}
+
+/*
+ * Free Checksum Memory
+ */
+
+void free_checksum_pages(void)
+{
+        while (pages_allocated) {
+                unsigned long next = *((unsigned long *) page_list);
+                ClearPageNosave(virt_to_page(page_list));
+                toi_free_page(15, (unsigned long) page_list);
+                page_list = next;
+                pages_allocated--;
+        }
+}
+
+/*
+ * Allocate Checksum Memory
+ */
+
+int allocate_checksum_pages(void)
+{
+        int pages_needed = checksum_pages_needed();
+
+        if (!toi_checksum_ops.enabled)
+                return 0;
+
+        while (pages_allocated < pages_needed) {
+                unsigned long *new_page =
+                  (unsigned long *) toi_get_zeroed_page(15, TOI_ATOMIC_GFP);
+                if (!new_page) {
+                        printk(KERN_ERR "Unable to allocate checksum pages.\n");
+                        return -ENOMEM;
+                }
+                SetPageNosave(virt_to_page(new_page));
+                (*new_page) = page_list;
+                page_list = (unsigned long) new_page;
+                pages_allocated++;
+        }
+
+        next_page = (unsigned long) page_list;
+        checksum_count = 0;
+
+        return 0;
+}
+
+char *tuxonice_get_next_checksum(void)
+{
+        if (!toi_checksum_ops.enabled)
+                return NULL;
+
+        if (checksum_count % CHECKSUMS_PER_PAGE)
+                this_checksum += CHECKSUM_SIZE;
+        else {
+                this_checksum = next_page + sizeof(void *);
+                next_page = *((unsigned long *) next_page);
+        }
+
+        checksum_count++;
+        return (char *) this_checksum;
+}
+
+int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
+{
+        char *pa;
+        int result, cpu = smp_processor_id();
+        struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+        if (!toi_checksum_ops.enabled)
+                return 0;
+
+        pa = kmap(page);
+        memcpy(ctx->buf, pa, PAGE_SIZE);
+        kunmap(page);
+        result = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
+                                                checksum_locn);
+        if (result)
+                printk(KERN_ERR "TuxOnIce checksumming: crypto_hash_digest "
+                                "returned %d.\n", result);
+        return result;
+}
+/*
+ * Calculate checksums
+ */
+
+void check_checksums(void)
+{
+        int index = 0, cpu = smp_processor_id();
+        char current_checksum[CHECKSUM_SIZE];
+        struct cpu_context *ctx = &per_cpu(contexts, cpu);
+        unsigned long pfn;
+
+        if (!toi_checksum_ops.enabled) {
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Checksumming disabled.");
+                return;
+        }
+
+        next_page = (unsigned long) page_list;
+
+        toi_num_resaved = 0;
+        this_checksum = 0;
+
+        toi_trace_index++;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Verifying checksums.");
+        memory_bm_position_reset(pageset2_map);
+        for (pfn = memory_bm_next_pfn(pageset2_map, 0); pfn != BM_END_OF_MAP;
+                        pfn = memory_bm_next_pfn(pageset2_map, 0)) {
+                int ret, resave_needed = false;
+                char *pa;
+                struct page *page = pfn_to_page(pfn);
+
+                if (index < checksum_count) {
+                    if (index % CHECKSUMS_PER_PAGE) {
+                        this_checksum += CHECKSUM_SIZE;
+                    } else {
+                        this_checksum = next_page + sizeof(void *);
+                        next_page = *((unsigned long *) next_page);
+                    }
+
+                    /* Done when IRQs disabled so must be atomic */
+                    pa = kmap_atomic(page);
+                    memcpy(ctx->buf, pa, PAGE_SIZE);
+                    kunmap_atomic(pa);
+                    ret = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
+                            current_checksum);
+
+                    if (ret) {
+                        printk(KERN_INFO "Digest failed. Returned %d.\n", ret);
+                        return;
+                    }
+
+                    resave_needed = memcmp(current_checksum, (char *) this_checksum,
+                            CHECKSUM_SIZE);
+                } else {
+                    resave_needed = true;
+                }
+
+                if (resave_needed) {
+                        TOI_TRACE_DEBUG(pfn, "_Resaving %d", resave_needed);
+                        SetPageResave(pfn_to_page(pfn));
+                        toi_num_resaved++;
+                        if (test_action_state(TOI_ABORT_ON_RESAVE_NEEDED))
+                                set_abort_result(TOI_RESAVE_NEEDED);
+                }
+
+                index++;
+        }
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Checksum verification complete.");
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_INT("enabled", SYSFS_RW, &toi_checksum_ops.enabled, 0, 1, 0,
+                        NULL),
+        SYSFS_BIT("abort_if_resave_needed", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_ABORT_ON_RESAVE_NEEDED, 0)
+};
+
+/*
+ * Ops structure.
+ */
+static struct toi_module_ops toi_checksum_ops = {
+        .type                        = MISC_MODULE,
+        .name                        = "checksumming",
+        .directory                = "checksum",
+        .module                        = THIS_MODULE,
+        .initialise                = toi_checksum_initialise,
+        .cleanup                = toi_checksum_cleanup,
+        .print_debug_info        = toi_checksum_print_debug_stats,
+        .save_config_info        = toi_checksum_save_config_info,
+        .load_config_info        = toi_checksum_load_config_info,
+        .memory_needed                = toi_checksum_memory_needed,
+        .storage_needed                = toi_checksum_storage_needed,
+
+        .sysfs_data                = sysfs_params,
+        .num_sysfs_entries        = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+int toi_checksum_init(void)
+{
+        int result = toi_register_module(&toi_checksum_ops);
+        return result;
+}
+
+void toi_checksum_exit(void)
+{
+        toi_unregister_module(&toi_checksum_ops);
+}
diff -Nur linux-4.2/kernel/power/tuxonice_checksum.h linux-4.2-pck/kernel/power/tuxonice_checksum.h
--- linux-4.2/kernel/power/tuxonice_checksum.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_checksum.h	2015-09-08 11:20:32.463678805 -0300
@@ -0,0 +1,31 @@
+/*
+ * kernel/power/tuxonice_checksum.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data checksum routines for TuxOnIce,
+ * using cryptoapi. They are used to locate any modifications
+ * made to pageset 2 while we're saving it.
+ */
+
+#if defined(CONFIG_TOI_CHECKSUM)
+extern int toi_checksum_init(void);
+extern void toi_checksum_exit(void);
+void check_checksums(void);
+int allocate_checksum_pages(void);
+void free_checksum_pages(void);
+char *tuxonice_get_next_checksum(void);
+int tuxonice_calc_checksum(struct page *page, char *checksum_locn);
+#else
+static inline int toi_checksum_init(void) { return 0; }
+static inline void toi_checksum_exit(void) { }
+static inline void check_checksums(void) { };
+static inline int allocate_checksum_pages(void) { return 0; };
+static inline void free_checksum_pages(void) { };
+static inline char *tuxonice_get_next_checksum(void) { return NULL; };
+static inline int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
+        { return 0; }
+#endif
+
diff -Nur linux-4.2/kernel/power/tuxonice_cluster.c linux-4.2-pck/kernel/power/tuxonice_cluster.c
--- linux-4.2/kernel/power/tuxonice_cluster.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_cluster.c	2015-09-08 11:20:32.467011976 -0300
@@ -0,0 +1,1058 @@
+/*
+ * kernel/power/tuxonice_cluster.c
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains routines for cluster hibernation support.
+ *
+ * Based on ip autoconfiguration code in net/ipv4/ipconfig.c.
+ *
+ * How does it work?
+ *
+ * There is no 'master' node that tells everyone else what to do. All nodes
+ * send messages to the broadcast address/port, maintain a list of peers
+ * and figure out when to progress to the next step in hibernating or resuming.
+ * This makes us more fault tolerant when it comes to nodes coming and going
+ * (which may be more of an issue if we're hibernating when power supplies
+ * are being unreliable).
+ *
+ * At boot time, we start a ktuxonice thread that handles communication with
+ * other nodes. This node maintains a state machine that controls our progress
+ * through hibernating and resuming, keeping us in step with other nodes. Nodes
+ * are identified by their hw address.
+ *
+ * On startup, the node sends CLUSTER_PING on the configured interface's
+ * broadcast address, port $toi_cluster_port (see below) and begins to listen
+ * for other broadcast messages. CLUSTER_PING messages are repeated at
+ * intervals of 5 minutes, with a random offset to spread traffic out.
+ *
+ * A hibernation cycle is initiated from any node via
+ *
+ * echo > /sys/power/tuxonice/do_hibernate
+ *
+ * and (possibily) the hibernate script. At each step of the process, the node
+ * completes its work, and waits for all other nodes to signal completion of
+ * their work (or timeout) before progressing to the next step.
+ *
+ * Request/state  Action before reply        Possible reply        Next state
+ * HIBERNATE          capable, pre-script        HIBERNATE|ACK        NODE_PREP
+ *                                         HIBERNATE|NACK        INIT_0
+ *
+ * PREP                  prepare_image                PREP|ACK        IMAGE_WRITE
+ *                                         PREP|NACK        INIT_0
+ *                                         ABORT                RUNNING
+ *
+ * IO                  write image                IO|ACK                power off
+ *                                         ABORT                POST_RESUME
+ *
+ * (Boot time)          check for image        IMAGE|ACK        RESUME_PREP
+ *                                         (Note 1)
+ *                                         IMAGE|NACK        (Note 2)
+ *
+ * PREP                  prepare read image        PREP|ACK        IMAGE_READ
+ *                                         PREP|NACK        (As NACK_IMAGE)
+ *
+ * IO                  read image                IO|ACK                POST_RESUME
+ *
+ * POST_RESUME          thaw, post-script                        RUNNING
+ *
+ * INIT_0          init 0
+ *
+ * Other messages:
+ *
+ * - PING: Request for all other live nodes to send a PONG. Used at startup to
+ *   announce presence, when a node is suspected dead and periodically, in case
+ *   segments of the network are [un]plugged.
+ *
+ * - PONG: Response to a PING.
+ *
+ * - ABORT: Request to cancel writing an image.
+ *
+ * - BYE: Notification that this node is shutting down.
+ *
+ * Note 1: Repeated at 3s intervals until we continue to boot/resume, so that
+ * nodes which are slower to start up can get state synchronised. If a node
+ * starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send
+ * ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it
+ * must invalidate its image (if any) and boot normally.
+ *
+ * Note 2: May occur when one node lost power or powered off while others
+ * hibernated. This node waits for others to complete resuming (ACK_READ)
+ * before completing its boot, so that it appears as a fail node restarting.
+ *
+ * If any node has an image, then it also has a list of nodes that hibernated
+ * in synchronisation with it. The node will wait for other nodes to appear
+ * or timeout before beginning its restoration.
+ *
+ * If a node has no image, it needs to wait, in case other nodes which do have
+ * an image are going to resume, but are taking longer to announce their
+ * presence. For this reason, the user can specify a timeout value and a number
+ * of nodes detected before we just continue. (We might want to assume in a
+ * cluster of, say, 15 nodes, if 8 others have booted without finding an image,
+ * the remaining nodes will too. This might help in situations where some nodes
+ * are much slower to boot, or more subject to hardware failures or such like).
+ */
+
+#include <linux/suspend.h>
+#include <linux/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/if_arp.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_io.h"
+
+#if 1
+#define PRINTK(a, b...) do { printk(a, ##b); } while (0)
+#else
+#define PRINTK(a, b...) do { } while (0)
+#endif
+
+static int loopback_mode;
+static int num_local_nodes = 1;
+#define MAX_LOCAL_NODES 8
+#define SADDR (loopback_mode ? b->sid : h->saddr)
+
+#define MYNAME "TuxOnIce Clustering"
+
+enum cluster_message {
+        MSG_ACK = 1,
+        MSG_NACK = 2,
+        MSG_PING = 4,
+        MSG_ABORT = 8,
+        MSG_BYE = 16,
+        MSG_HIBERNATE = 32,
+        MSG_IMAGE = 64,
+        MSG_IO = 128,
+        MSG_RUNNING = 256
+};
+
+static char *str_message(int message)
+{
+        switch (message) {
+        case 4:
+                return "Ping";
+        case 8:
+                return "Abort";
+        case 9:
+                return "Abort acked";
+        case 10:
+                return "Abort nacked";
+        case 16:
+                return "Bye";
+        case 17:
+                return "Bye acked";
+        case 18:
+                return "Bye nacked";
+        case 32:
+                return "Hibernate request";
+        case 33:
+                return "Hibernate ack";
+        case 34:
+                return "Hibernate nack";
+        case 64:
+                return "Image exists?";
+        case 65:
+                return "Image does exist";
+        case 66:
+                return "No image here";
+        case 128:
+                return "I/O";
+        case 129:
+                return "I/O okay";
+        case 130:
+                return "I/O failed";
+        case 256:
+                return "Running";
+        default:
+                printk(KERN_ERR "Unrecognised message %d.\n", message);
+                return "Unrecognised message (see dmesg)";
+        }
+}
+
+#define MSG_ACK_MASK (MSG_ACK | MSG_NACK)
+#define MSG_STATE_MASK (~MSG_ACK_MASK)
+
+struct node_info {
+        struct list_head member_list;
+        wait_queue_head_t member_events;
+        spinlock_t member_list_lock;
+        spinlock_t receive_lock;
+        int peer_count, ignored_peer_count;
+        struct toi_sysfs_data sysfs_data;
+        enum cluster_message current_message;
+};
+
+struct node_info node_array[MAX_LOCAL_NODES];
+
+struct cluster_member {
+        __be32 addr;
+        enum cluster_message message;
+        struct list_head list;
+        int ignore;
+};
+
+#define toi_cluster_port_send 3501
+#define toi_cluster_port_recv 3502
+
+static struct net_device *net_dev;
+static struct toi_module_ops toi_cluster_ops;
+
+static int toi_recv(struct sk_buff *skb, struct net_device *dev,
+                struct packet_type *pt, struct net_device *orig_dev);
+
+static struct packet_type toi_cluster_packet_type = {
+        .type =        __constant_htons(ETH_P_IP),
+        .func =        toi_recv,
+};
+
+struct toi_pkt {                /* BOOTP packet format */
+        struct iphdr iph;        /* IP header */
+        struct udphdr udph;        /* UDP header */
+        u8 htype;                /* HW address type */
+        u8 hlen;                /* HW address length */
+        __be32 xid;                /* Transaction ID */
+        __be16 secs;                /* Seconds since we started */
+        __be16 flags;                /* Just what it says */
+        u8 hw_addr[16];                /* Sender's HW address */
+        u16 message;                /* Message */
+        unsigned long sid;        /* Source ID for loopback testing */
+};
+
+static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE;
+
+static int added_pack;
+
+static int others_have_image;
+
+/* Key used to allow multiple clusters on the same lan */
+static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY;
+static char pre_hibernate_script[255] =
+        CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE;
+static char post_hibernate_script[255] =
+        CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE;
+
+/*                        List of cluster members                        */
+static unsigned long continue_delay = 5 * HZ;
+static unsigned long cluster_message_timeout = 3 * HZ;
+
+/*                 === Membership list ===         */
+
+static void print_member_info(int index)
+{
+        struct cluster_member *this;
+
+        printk(KERN_INFO "==> Dumping node %d.\n", index);
+
+        list_for_each_entry(this, &node_array[index].member_list, list)
+                printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n",
+                                NIPQUAD(this->addr),
+                                str_message(this->message),
+                                this->ignore ? "(Ignored)" : "");
+        printk(KERN_INFO "== Done ==\n");
+}
+
+static struct cluster_member *__find_member(int index, __be32 addr)
+{
+        struct cluster_member *this;
+
+        list_for_each_entry(this, &node_array[index].member_list, list) {
+                if (this->addr != addr)
+                        continue;
+
+                return this;
+        }
+
+        return NULL;
+}
+
+static void set_ignore(int index, __be32 addr, struct cluster_member *this)
+{
+        if (this->ignore) {
+                PRINTK("Node %d already ignoring %d.%d.%d.%d.\n",
+                                index, NIPQUAD(addr));
+                return;
+        }
+
+        PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n",
+                                index, NIPQUAD(addr));
+        this->ignore = 1;
+        node_array[index].ignored_peer_count++;
+}
+
+static int __add_update_member(int index, __be32 addr, int message)
+{
+        struct cluster_member *this;
+
+        this = __find_member(index, addr);
+        if (this) {
+                if (this->message != message) {
+                        this->message = message;
+                        if ((message & MSG_NACK) &&
+                            (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
+                                set_ignore(index, addr, this);
+                        PRINTK("Node %d sees node %d.%d.%d.%d now sending "
+                                        "%s.\n", index, NIPQUAD(addr),
+                                        str_message(message));
+                        wake_up(&node_array[index].member_events);
+                }
+                return 0;
+        }
+
+        this = (struct cluster_member *) toi_kzalloc(36,
+                        sizeof(struct cluster_member), GFP_KERNEL);
+
+        if (!this)
+                return -1;
+
+        this->addr = addr;
+        this->message = message;
+        this->ignore = 0;
+        INIT_LIST_HEAD(&this->list);
+
+        node_array[index].peer_count++;
+
+        PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index,
+                        NIPQUAD(addr), str_message(message));
+
+        if ((message & MSG_NACK) &&
+            (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
+                set_ignore(index, addr, this);
+        list_add_tail(&this->list, &node_array[index].member_list);
+        return 1;
+}
+
+static int add_update_member(int index, __be32 addr, int message)
+{
+        int result;
+        unsigned long flags;
+        spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+        result = __add_update_member(index, addr, message);
+        spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+
+        print_member_info(index);
+
+        wake_up(&node_array[index].member_events);
+
+        return result;
+}
+
+static void del_member(int index, __be32 addr)
+{
+        struct cluster_member *this;
+        unsigned long flags;
+
+        spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+        this = __find_member(index, addr);
+
+        if (this) {
+                list_del_init(&this->list);
+                toi_kfree(36, this, sizeof(*this));
+                node_array[index].peer_count--;
+        }
+
+        spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+}
+
+/*                 === Message transmission ===        */
+
+static void toi_send_if(int message, unsigned long my_id);
+
+/*
+ *  Process received TOI packet.
+ */
+static int toi_recv(struct sk_buff *skb, struct net_device *dev,
+                struct packet_type *pt, struct net_device *orig_dev)
+{
+        struct toi_pkt *b;
+        struct iphdr *h;
+        int len, result, index;
+        unsigned long addr, message, ack;
+
+        /* Perform verifications before taking the lock.  */
+        if (skb->pkt_type == PACKET_OTHERHOST)
+                goto drop;
+
+        if (dev != net_dev)
+                goto drop;
+
+        skb = skb_share_check(skb, GFP_ATOMIC);
+        if (!skb)
+                return NET_RX_DROP;
+
+        if (!pskb_may_pull(skb,
+                           sizeof(struct iphdr) +
+                           sizeof(struct udphdr)))
+                goto drop;
+
+        b = (struct toi_pkt *)skb_network_header(skb);
+        h = &b->iph;
+
+        if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
+                goto drop;
+
+        /* Fragments are not supported */
+        if (h->frag_off & htons(IP_OFFSET | IP_MF)) {
+                if (net_ratelimit())
+                        printk(KERN_ERR "TuxOnIce: Ignoring fragmented "
+                               "cluster message.\n");
+                goto drop;
+        }
+
+        if (skb->len < ntohs(h->tot_len))
+                goto drop;
+
+        if (ip_fast_csum((char *) h, h->ihl))
+                goto drop;
+
+        if (b->udph.source != htons(toi_cluster_port_send) ||
+            b->udph.dest != htons(toi_cluster_port_recv))
+                goto drop;
+
+        if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
+                goto drop;
+
+        len = ntohs(b->udph.len) - sizeof(struct udphdr);
+
+        /* Ok the front looks good, make sure we can get at the rest.  */
+        if (!pskb_may_pull(skb, skb->len))
+                goto drop;
+
+        b = (struct toi_pkt *)skb_network_header(skb);
+        h = &b->iph;
+
+        addr = SADDR;
+        PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n",
+                        str_message(b->message), NIPQUAD(addr));
+
+        message = b->message & MSG_STATE_MASK;
+        ack = b->message & MSG_ACK_MASK;
+
+        for (index = 0; index < num_local_nodes; index++) {
+                int new_message = node_array[index].current_message,
+                    old_message = new_message;
+
+                if (index == SADDR || !old_message) {
+                        PRINTK("Ignoring node %d (offline or self).\n", index);
+                        continue;
+                }
+
+                /* One message at a time, please. */
+                spin_lock(&node_array[index].receive_lock);
+
+                result = add_update_member(index, SADDR, b->message);
+                if (result == -1) {
+                        printk(KERN_INFO "Failed to add new cluster member "
+                                        NIPQUAD_FMT ".\n",
+                                        NIPQUAD(addr));
+                        goto drop_unlock;
+                }
+
+                switch (b->message & MSG_STATE_MASK) {
+                case MSG_PING:
+                        break;
+                case MSG_ABORT:
+                        break;
+                case MSG_BYE:
+                        break;
+                case MSG_HIBERNATE:
+                        /* Can I hibernate? */
+                        new_message = MSG_HIBERNATE |
+                                ((index & 1) ? MSG_NACK : MSG_ACK);
+                        break;
+                case MSG_IMAGE:
+                        /* Can I resume? */
+                        new_message = MSG_IMAGE |
+                                ((index & 1) ? MSG_NACK : MSG_ACK);
+                        if (new_message != old_message)
+                                printk(KERN_ERR "Setting whether I can resume "
+                                                "to %d.\n", new_message);
+                        break;
+                case MSG_IO:
+                        new_message = MSG_IO | MSG_ACK;
+                        break;
+                case MSG_RUNNING:
+                        break;
+                default:
+                        if (net_ratelimit())
+                                printk(KERN_ERR "Unrecognised TuxOnIce cluster"
+                                        " message %d from " NIPQUAD_FMT ".\n",
+                                        b->message, NIPQUAD(addr));
+                };
+
+                if (old_message != new_message) {
+                        node_array[index].current_message = new_message;
+                        printk(KERN_INFO ">>> Sending new message for node "
+                                        "%d.\n", index);
+                        toi_send_if(new_message, index);
+                } else if (!ack) {
+                        printk(KERN_INFO ">>> Resending message for node %d.\n",
+                                        index);
+                        toi_send_if(new_message, index);
+                }
+drop_unlock:
+                spin_unlock(&node_array[index].receive_lock);
+        };
+
+drop:
+        /* Throw the packet out. */
+        kfree_skb(skb);
+
+        return 0;
+}
+
+/*
+ *  Send cluster message to single interface.
+ */
+static void toi_send_if(int message, unsigned long my_id)
+{
+        struct sk_buff *skb;
+        struct toi_pkt *b;
+        int hh_len = LL_RESERVED_SPACE(net_dev);
+        struct iphdr *h;
+
+        /* Allocate packet */
+        skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL);
+        if (!skb)
+                return;
+        skb_reserve(skb, hh_len);
+        b = (struct toi_pkt *) skb_put(skb, sizeof(struct toi_pkt));
+        memset(b, 0, sizeof(struct toi_pkt));
+
+        /* Construct IP header */
+        skb_reset_network_header(skb);
+        h = ip_hdr(skb);
+        h->version = 4;
+        h->ihl = 5;
+        h->tot_len = htons(sizeof(struct toi_pkt));
+        h->frag_off = htons(IP_DF);
+        h->ttl = 64;
+        h->protocol = IPPROTO_UDP;
+        h->daddr = htonl(INADDR_BROADCAST);
+        h->check = ip_fast_csum((unsigned char *) h, h->ihl);
+
+        /* Construct UDP header */
+        b->udph.source = htons(toi_cluster_port_send);
+        b->udph.dest = htons(toi_cluster_port_recv);
+        b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr));
+        /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
+
+        /* Construct message */
+        b->message = message;
+        b->sid = my_id;
+        b->htype = net_dev->type; /* can cause undefined behavior */
+        b->hlen = net_dev->addr_len;
+        memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len);
+        b->secs = htons(3); /* 3 seconds */
+
+        /* Chain packet down the line... */
+        skb->dev = net_dev;
+        skb->protocol = htons(ETH_P_IP);
+        if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol),
+                     net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) ||
+                        dev_queue_xmit(skb) < 0)
+                printk(KERN_INFO "E");
+}
+
+/*        =========================================                */
+
+/*                        kTOICluster                        */
+
+static atomic_t num_cluster_threads;
+static DECLARE_WAIT_QUEUE_HEAD(clusterd_events);
+
+static int kTOICluster(void *data)
+{
+        unsigned long my_id;
+
+        my_id = atomic_add_return(1, &num_cluster_threads) - 1;
+        node_array[my_id].current_message = (unsigned long) data;
+
+        PRINTK("kTOICluster daemon %lu starting.\n", my_id);
+
+        current->flags |= PF_NOFREEZE;
+
+        while (node_array[my_id].current_message) {
+                toi_send_if(node_array[my_id].current_message, my_id);
+                sleep_on_timeout(&clusterd_events,
+                                cluster_message_timeout);
+                PRINTK("Link state %lu is %d.\n", my_id,
+                                node_array[my_id].current_message);
+        }
+
+        toi_send_if(MSG_BYE, my_id);
+        atomic_dec(&num_cluster_threads);
+        wake_up(&clusterd_events);
+
+        PRINTK("kTOICluster daemon %lu exiting.\n", my_id);
+        __set_current_state(TASK_RUNNING);
+        return 0;
+}
+
+static void kill_clusterd(void)
+{
+        int i;
+
+        for (i = 0; i < num_local_nodes; i++) {
+                if (node_array[i].current_message) {
+                        PRINTK("Seeking to kill clusterd %d.\n", i);
+                        node_array[i].current_message = 0;
+                }
+        }
+        wait_event(clusterd_events,
+                        !atomic_read(&num_cluster_threads));
+        PRINTK("All cluster daemons have exited.\n");
+}
+
+static int peers_not_in_message(int index, int message, int precise)
+{
+        struct cluster_member *this;
+        unsigned long flags;
+        int result = 0;
+
+        spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+        list_for_each_entry(this, &node_array[index].member_list, list) {
+                if (this->ignore)
+                        continue;
+
+                PRINTK("Peer %d.%d.%d.%d sending %s. "
+                        "Seeking %s.\n",
+                        NIPQUAD(this->addr),
+                        str_message(this->message), str_message(message));
+                if ((precise ? this->message :
+                                        this->message & MSG_STATE_MASK) !=
+                                        message)
+                        result++;
+        }
+        spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+        PRINTK("%d peers in sought message.\n", result);
+        return result;
+}
+
+static void reset_ignored(int index)
+{
+        struct cluster_member *this;
+        unsigned long flags;
+
+        spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+        list_for_each_entry(this, &node_array[index].member_list, list)
+                this->ignore = 0;
+        node_array[index].ignored_peer_count = 0;
+        spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+}
+
+static int peers_in_message(int index, int message, int precise)
+{
+        return node_array[index].peer_count -
+                node_array[index].ignored_peer_count -
+                peers_not_in_message(index, message, precise);
+}
+
+static int time_to_continue(int index, unsigned long start, int message)
+{
+        int first = peers_not_in_message(index, message, 0);
+        int second = peers_in_message(index, message, 1);
+
+        PRINTK("First part returns %d, second returns %d.\n", first, second);
+
+        if (!first && !second) {
+                PRINTK("All peers answered message %d.\n",
+                        message);
+                return 1;
+        }
+
+        if (time_after(jiffies, start + continue_delay)) {
+                PRINTK("Timeout reached.\n");
+                return 1;
+        }
+
+        PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies,
+                        start + continue_delay);
+        return 0;
+}
+
+void toi_initiate_cluster_hibernate(void)
+{
+        int result;
+        unsigned long start;
+
+        result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
+        if (result)
+                return;
+
+        toi_send_if(MSG_HIBERNATE, 0);
+
+        start = jiffies;
+        wait_event(node_array[0].member_events,
+                        time_to_continue(0, start, MSG_HIBERNATE));
+
+        if (test_action_state(TOI_FREEZER_TEST)) {
+                toi_send_if(MSG_ABORT, 0);
+
+                start = jiffies;
+                wait_event(node_array[0].member_events,
+                        time_to_continue(0, start, MSG_RUNNING));
+
+                do_toi_step(STEP_QUIET_CLEANUP);
+                return;
+        }
+
+        toi_send_if(MSG_IO, 0);
+
+        result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
+        if (result)
+                return;
+
+        /* This code runs at resume time too! */
+        if (toi_in_hibernate)
+                result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
+}
+
+/* toi_cluster_print_debug_stats
+ *
+ * Description:        Print information to be recorded for debugging purposes into a
+ *                 buffer.
+ * Arguments:        buffer: Pointer to a buffer into which the debug info will be
+ *                         printed.
+ *                 size:        Size of the buffer.
+ * Returns:        Number of characters written to the buffer.
+ */
+static int toi_cluster_print_debug_stats(char *buffer, int size)
+{
+        int len;
+
+        if (strlen(toi_cluster_iface))
+                len = scnprintf(buffer, size,
+                                "- Cluster interface is '%s'.\n",
+                                toi_cluster_iface);
+        else
+                len = scnprintf(buffer, size,
+                                "- Cluster support is disabled.\n");
+        return len;
+}
+
+/* cluster_memory_needed
+ *
+ * Description:        Tell the caller how much memory we need to operate during
+ *                 hibernate/resume.
+ * Returns:        Unsigned long. Maximum number of bytes of memory required for
+ *                 operation.
+ */
+static int toi_cluster_memory_needed(void)
+{
+        return 0;
+}
+
+static int toi_cluster_storage_needed(void)
+{
+        return 1 + strlen(toi_cluster_iface);
+}
+
+/* toi_cluster_save_config_info
+ *
+ * Description:        Save informaton needed when reloading the image at resume time.
+ * Arguments:        Buffer:                Pointer to a buffer of size PAGE_SIZE.
+ * Returns:        Number of bytes used for saving our data.
+ */
+static int toi_cluster_save_config_info(char *buffer)
+{
+        strcpy(buffer, toi_cluster_iface);
+        return strlen(toi_cluster_iface + 1);
+}
+
+/* toi_cluster_load_config_info
+ *
+ * Description:        Reload information needed for declustering the image at
+ *                 resume time.
+ * Arguments:        Buffer:                Pointer to the start of the data.
+ *                Size:                Number of bytes that were saved.
+ */
+static void toi_cluster_load_config_info(char *buffer, int size)
+{
+        strncpy(toi_cluster_iface, buffer, size);
+        return;
+}
+
+static void cluster_startup(void)
+{
+        int have_image = do_check_can_resume(), i;
+        unsigned long start = jiffies, initial_message;
+        struct task_struct *p;
+
+        initial_message = MSG_IMAGE;
+
+        have_image = 1;
+
+        for (i = 0; i < num_local_nodes; i++) {
+                PRINTK("Starting ktoiclusterd %d.\n", i);
+                p = kthread_create(kTOICluster, (void *) initial_message,
+                                "ktoiclusterd/%d", i);
+                if (IS_ERR(p)) {
+                        printk(KERN_ERR "Failed to start ktoiclusterd.\n");
+                        return;
+                }
+
+                wake_up_process(p);
+        }
+
+        /* Wait for delay or someone else sending first message */
+        wait_event(node_array[0].member_events, time_to_continue(0, start,
+                                MSG_IMAGE));
+
+        others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1);
+
+        printk(KERN_INFO "Continuing. I %shave an image. Peers with image:"
+                " %d.\n", have_image ? "" : "don't ", others_have_image);
+
+        if (have_image) {
+                int result;
+
+                /* Start to resume */
+                printk(KERN_INFO "  === Starting to resume ===  \n");
+                node_array[0].current_message = MSG_IO;
+                toi_send_if(MSG_IO, 0);
+
+                /* result = do_toi_step(STEP_RESUME_LOAD_PS1); */
+                result = 0;
+
+                if (!result) {
+                        /*
+                         * Atomic restore - we'll come back in the hibernation
+                         * path.
+                         */
+
+                        /* result = do_toi_step(STEP_RESUME_DO_RESTORE); */
+                        result = 0;
+
+                        /* do_toi_step(STEP_QUIET_CLEANUP); */
+                }
+
+                node_array[0].current_message |= MSG_NACK;
+
+                /* For debugging - disable for real life? */
+                wait_event(node_array[0].member_events,
+                                time_to_continue(0, start, MSG_IO));
+        }
+
+        if (others_have_image) {
+                /* Wait for them to resume */
+                printk(KERN_INFO "Waiting for other nodes to resume.\n");
+                start = jiffies;
+                wait_event(node_array[0].member_events,
+                                time_to_continue(0, start, MSG_RUNNING));
+                if (peers_not_in_message(0, MSG_RUNNING, 0))
+                        printk(KERN_INFO "Timed out while waiting for other "
+                                        "nodes to resume.\n");
+        }
+
+        /* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE
+         * as appropriate.
+         *
+         * If we don't have an image:
+         * - Wait until someone else says they have one, or conditions are met
+         *   for continuing to boot (n machines or t seconds).
+         * - If anyone has an image, wait for them to resume before continuing
+         *   to boot.
+         *
+         * If we have an image:
+         * - Wait until conditions are met before continuing to resume (n
+         *   machines or t seconds). Send RESUME_PREP and freeze processes.
+         *   NACK_PREP if freezing fails (shouldn't) and follow logic for
+         *   us having no image above. On success, wait for [N]ACK_PREP from
+         *   other machines. Read image (including atomic restore) until done.
+         *   Wait for ACK_READ from others (should never fail). Thaw processes
+         *   and do post-resume. (The section after the atomic restore is done
+         *   via the code for hibernating).
+         */
+
+        node_array[0].current_message = MSG_RUNNING;
+}
+
+/* toi_cluster_open_iface
+ *
+ * Description:        Prepare to use an interface.
+ */
+
+static int toi_cluster_open_iface(void)
+{
+        struct net_device *dev;
+
+        rtnl_lock();
+
+        for_each_netdev(&init_net, dev) {
+                if (/* dev == &init_net.loopback_dev || */
+                    strcmp(dev->name, toi_cluster_iface))
+                        continue;
+
+                net_dev = dev;
+                break;
+        }
+
+        rtnl_unlock();
+
+        if (!net_dev) {
+                printk(KERN_ERR MYNAME ": Device %s not found.\n",
+                                toi_cluster_iface);
+                return -ENODEV;
+        }
+
+        dev_add_pack(&toi_cluster_packet_type);
+        added_pack = 1;
+
+        loopback_mode = (net_dev == init_net.loopback_dev);
+        num_local_nodes = loopback_mode ? 8 : 1;
+
+        PRINTK("Loopback mode is %s. Number of local nodes is %d.\n",
+                        loopback_mode ? "on" : "off", num_local_nodes);
+
+        cluster_startup();
+        return 0;
+}
+
+/* toi_cluster_close_iface
+ *
+ * Description: Stop using an interface.
+ */
+
+static int toi_cluster_close_iface(void)
+{
+        kill_clusterd();
+        if (added_pack) {
+                dev_remove_pack(&toi_cluster_packet_type);
+                added_pack = 0;
+        }
+        return 0;
+}
+
+static void write_side_effect(void)
+{
+        if (toi_cluster_ops.enabled) {
+                toi_cluster_open_iface();
+                set_toi_state(TOI_CLUSTER_MODE);
+        } else {
+                toi_cluster_close_iface();
+                clear_toi_state(TOI_CLUSTER_MODE);
+        }
+}
+
+static void node_write_side_effect(void)
+{
+}
+
+/*
+ * data for our sysfs entries.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_STRING("interface", SYSFS_RW, toi_cluster_iface, IFNAMSIZ, 0,
+                        NULL),
+        SYSFS_INT("enabled", SYSFS_RW, &toi_cluster_ops.enabled, 0, 1, 0,
+                        write_side_effect),
+        SYSFS_STRING("cluster_name", SYSFS_RW, toi_cluster_key, 32, 0, NULL),
+        SYSFS_STRING("pre-hibernate-script", SYSFS_RW, pre_hibernate_script,
+                        256, 0, NULL),
+        SYSFS_STRING("post-hibernate-script", SYSFS_RW, post_hibernate_script,
+                        256, 0, STRING),
+        SYSFS_UL("continue_delay", SYSFS_RW, &continue_delay, HZ / 2, 60 * HZ,
+                        0)
+};
+
+/*
+ * Ops structure.
+ */
+
+static struct toi_module_ops toi_cluster_ops = {
+        .type                        = FILTER_MODULE,
+        .name                        = "Cluster",
+        .directory                = "cluster",
+        .module                        = THIS_MODULE,
+        .memory_needed                 = toi_cluster_memory_needed,
+        .print_debug_info        = toi_cluster_print_debug_stats,
+        .save_config_info        = toi_cluster_save_config_info,
+        .load_config_info        = toi_cluster_load_config_info,
+        .storage_needed                = toi_cluster_storage_needed,
+
+        .sysfs_data                = sysfs_params,
+        .num_sysfs_entries        = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+
+#ifdef MODULE
+#define INIT static __init
+#define EXIT static __exit
+#else
+#define INIT
+#define EXIT
+#endif
+
+INIT int toi_cluster_init(void)
+{
+        int temp = toi_register_module(&toi_cluster_ops), i;
+        struct kobject *kobj = toi_cluster_ops.dir_kobj;
+
+        for (i = 0; i < MAX_LOCAL_NODES; i++) {
+                node_array[i].current_message = 0;
+                INIT_LIST_HEAD(&node_array[i].member_list);
+                init_waitqueue_head(&node_array[i].member_events);
+                spin_lock_init(&node_array[i].member_list_lock);
+                spin_lock_init(&node_array[i].receive_lock);
+
+                /* Set up sysfs entry */
+                node_array[i].sysfs_data.attr.name = toi_kzalloc(8,
+                                sizeof(node_array[i].sysfs_data.attr.name),
+                                GFP_KERNEL);
+                sprintf((char *) node_array[i].sysfs_data.attr.name, "node_%d",
+                                i);
+                node_array[i].sysfs_data.attr.mode = SYSFS_RW;
+                node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER;
+                node_array[i].sysfs_data.flags = 0;
+                node_array[i].sysfs_data.data.integer.variable =
+                        (int *) &node_array[i].current_message;
+                node_array[i].sysfs_data.data.integer.minimum = 0;
+                node_array[i].sysfs_data.data.integer.maximum = INT_MAX;
+                node_array[i].sysfs_data.write_side_effect =
+                        node_write_side_effect;
+                toi_register_sysfs_file(kobj, &node_array[i].sysfs_data);
+        }
+
+        toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0);
+
+        if (toi_cluster_ops.enabled)
+                toi_cluster_open_iface();
+
+        return temp;
+}
+
+EXIT void toi_cluster_exit(void)
+{
+        int i;
+        toi_cluster_close_iface();
+
+        for (i = 0; i < MAX_LOCAL_NODES; i++)
+                toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj,
+                                &node_array[i].sysfs_data);
+        toi_unregister_module(&toi_cluster_ops);
+}
+
+static int __init toi_cluster_iface_setup(char *iface)
+{
+        toi_cluster_ops.enabled = (*iface &&
+                        strcmp(iface, "off"));
+
+        if (toi_cluster_ops.enabled)
+                strncpy(toi_cluster_iface, iface, strlen(iface));
+}
+
+__setup("toi_cluster=", toi_cluster_iface_setup);
diff -Nur linux-4.2/kernel/power/tuxonice_cluster.h linux-4.2-pck/kernel/power/tuxonice_cluster.h
--- linux-4.2/kernel/power/tuxonice_cluster.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_cluster.h	2015-09-08 11:20:32.467011976 -0300
@@ -0,0 +1,18 @@
+/*
+ * kernel/power/tuxonice_cluster.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifdef CONFIG_TOI_CLUSTER
+extern int toi_cluster_init(void);
+extern void toi_cluster_exit(void);
+extern void toi_initiate_cluster_hibernate(void);
+#else
+static inline int toi_cluster_init(void) { return 0; }
+static inline void toi_cluster_exit(void) { }
+static inline void toi_initiate_cluster_hibernate(void) { }
+#endif
+
diff -Nur linux-4.2/kernel/power/tuxonice_compress.c linux-4.2-pck/kernel/power/tuxonice_compress.c
--- linux-4.2/kernel/power/tuxonice_compress.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_compress.c	2015-09-08 11:20:32.467011976 -0300
@@ -0,0 +1,452 @@
+/*
+ * kernel/power/compression.c
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data compression routines for TuxOnIce,
+ * using cryptoapi.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/crypto.h>
+
+#include "tuxonice_builtin.h"
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+
+static int toi_expected_compression;
+
+static struct toi_module_ops toi_compression_ops;
+static struct toi_module_ops *next_driver;
+
+static char toi_compressor_name[32] = "lzo";
+
+static DEFINE_MUTEX(stats_lock);
+
+struct cpu_context {
+        u8 *page_buffer;
+        struct crypto_comp *transform;
+        unsigned int len;
+        u8 *buffer_start;
+        u8 *output_buffer;
+};
+
+#define OUT_BUF_SIZE (2 * PAGE_SIZE)
+
+static DEFINE_PER_CPU(struct cpu_context, contexts);
+
+/*
+ * toi_crypto_prepare
+ *
+ * Prepare to do some work by allocating buffers and transforms.
+ */
+static int toi_compress_crypto_prepare(void)
+{
+        int cpu;
+
+        if (!*toi_compressor_name) {
+                printk(KERN_INFO "TuxOnIce: Compression enabled but no "
+                                "compressor name set.\n");
+                return 1;
+        }
+
+        for_each_online_cpu(cpu) {
+                struct cpu_context *this = &per_cpu(contexts, cpu);
+                this->transform = crypto_alloc_comp(toi_compressor_name, 0, 0);
+                if (IS_ERR(this->transform)) {
+                        printk(KERN_INFO "TuxOnIce: Failed to initialise the "
+                                        "%s compression transform.\n",
+                                        toi_compressor_name);
+                        this->transform = NULL;
+                        return 1;
+                }
+
+                this->page_buffer =
+                        (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP);
+
+                if (!this->page_buffer) {
+                        printk(KERN_ERR
+                          "Failed to allocate a page buffer for TuxOnIce "
+                          "compression driver.\n");
+                        return -ENOMEM;
+                }
+
+                this->output_buffer =
+                        (char *) vmalloc_32(OUT_BUF_SIZE);
+
+                if (!this->output_buffer) {
+                        printk(KERN_ERR
+                          "Failed to allocate a output buffer for TuxOnIce "
+                          "compression driver.\n");
+                        return -ENOMEM;
+                }
+        }
+
+        return 0;
+}
+
+static int toi_compress_rw_cleanup(int writing)
+{
+        int cpu;
+
+        for_each_online_cpu(cpu) {
+                struct cpu_context *this = &per_cpu(contexts, cpu);
+                if (this->transform) {
+                        crypto_free_comp(this->transform);
+                        this->transform = NULL;
+                }
+
+                if (this->page_buffer)
+                        toi_free_page(16, (unsigned long) this->page_buffer);
+
+                this->page_buffer = NULL;
+
+                if (this->output_buffer)
+                        vfree(this->output_buffer);
+
+                this->output_buffer = NULL;
+        }
+
+        return 0;
+}
+
+/*
+ * toi_compress_init
+ */
+
+static int toi_compress_init(int toi_or_resume)
+{
+        if (!toi_or_resume)
+                return 0;
+
+        toi_compress_bytes_in = 0;
+        toi_compress_bytes_out = 0;
+
+        next_driver = toi_get_next_filter(&toi_compression_ops);
+
+        return next_driver ? 0 : -ECHILD;
+}
+
+/*
+ * toi_compress_rw_init()
+ */
+
+static int toi_compress_rw_init(int rw, int stream_number)
+{
+        if (toi_compress_crypto_prepare()) {
+                printk(KERN_ERR "Failed to initialise compression "
+                                "algorithm.\n");
+                if (rw == READ) {
+                        printk(KERN_INFO "Unable to read the image.\n");
+                        return -ENODEV;
+                } else {
+                        printk(KERN_INFO "Continuing without "
+                                "compressing the image.\n");
+                        toi_compression_ops.enabled = 0;
+                }
+        }
+
+        return 0;
+}
+
+/*
+ * toi_compress_write_page()
+ *
+ * Compress a page of data, buffering output and passing on filled
+ * pages to the next module in the pipeline.
+ *
+ * Buffer_page:        Pointer to a buffer of size PAGE_SIZE, containing
+ * data to be compressed.
+ *
+ * Returns:        0 on success. Otherwise the error is that returned by later
+ *                 modules, -ECHILD if we have a broken pipeline or -EIO if
+ *                 zlib errs.
+ */
+static int toi_compress_write_page(unsigned long index, int buf_type,
+                void *buffer_page, unsigned int buf_size)
+{
+        int ret = 0, cpu = smp_processor_id();
+        struct cpu_context *ctx = &per_cpu(contexts, cpu);
+        u8* output_buffer = buffer_page;
+        int output_len = buf_size;
+        int out_buf_type = buf_type;
+
+        if (ctx->transform) {
+
+                ctx->buffer_start = TOI_MAP(buf_type, buffer_page);
+                ctx->len = OUT_BUF_SIZE;
+
+                ret = crypto_comp_compress(ctx->transform,
+                        ctx->buffer_start, buf_size,
+                        ctx->output_buffer, &ctx->len);
+
+                TOI_UNMAP(buf_type, buffer_page);
+
+                toi_message(TOI_COMPRESS, TOI_VERBOSE, 0,
+                                "CPU %d, index %lu: %d bytes",
+                                cpu, index, ctx->len);
+
+                if (!ret && ctx->len < buf_size) { /* some compression */
+                        output_buffer = ctx->output_buffer;
+                        output_len = ctx->len;
+                        out_buf_type = TOI_VIRT;
+                }
+
+        }
+
+        mutex_lock(&stats_lock);
+
+        toi_compress_bytes_in += buf_size;
+        toi_compress_bytes_out += output_len;
+
+        mutex_unlock(&stats_lock);
+
+        if (!ret)
+                ret = next_driver->write_page(index, out_buf_type,
+                                output_buffer, output_len);
+
+        return ret;
+}
+
+/*
+ * toi_compress_read_page()
+ * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Retrieve data from later modules and decompress it until the input buffer
+ * is filled.
+ * Zero if successful. Error condition from me or from downstream on failure.
+ */
+static int toi_compress_read_page(unsigned long *index, int buf_type,
+                void *buffer_page, unsigned int *buf_size)
+{
+        int ret, cpu = smp_processor_id();
+        unsigned int len;
+        unsigned int outlen = PAGE_SIZE;
+        char *buffer_start;
+        struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+        if (!ctx->transform)
+                return next_driver->read_page(index, TOI_PAGE, buffer_page,
+                                buf_size);
+
+        /*
+         * All our reads must be synchronous - we can't decompress
+         * data that hasn't been read yet.
+         */
+
+        ret = next_driver->read_page(index, TOI_VIRT, ctx->page_buffer, &len);
+
+        buffer_start = kmap(buffer_page);
+
+        /* Error or uncompressed data */
+        if (ret || len == PAGE_SIZE) {
+                memcpy(buffer_start, ctx->page_buffer, len);
+                goto out;
+        }
+
+        ret = crypto_comp_decompress(
+                        ctx->transform,
+                        ctx->page_buffer,
+                        len, buffer_start, &outlen);
+
+        toi_message(TOI_COMPRESS, TOI_VERBOSE, 0,
+                        "CPU %d, index %lu: %d=>%d (%d).",
+                        cpu, *index, len, outlen, ret);
+
+        if (ret)
+                abort_hibernate(TOI_FAILED_IO,
+                        "Compress_read returned %d.\n", ret);
+        else if (outlen != PAGE_SIZE) {
+                abort_hibernate(TOI_FAILED_IO,
+                        "Decompression yielded %d bytes instead of %ld.\n",
+                        outlen, PAGE_SIZE);
+                printk(KERN_ERR "Decompression yielded %d bytes instead of "
+                                "%ld.\n", outlen, PAGE_SIZE);
+                ret = -EIO;
+                *buf_size = outlen;
+        }
+out:
+        TOI_UNMAP(buf_type, buffer_page);
+        return ret;
+}
+
+/*
+ * toi_compress_print_debug_stats
+ * @buffer: Pointer to a buffer into which the debug info will be printed.
+ * @size: Size of the buffer.
+ *
+ * Print information to be recorded for debugging purposes into a buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+
+static int toi_compress_print_debug_stats(char *buffer, int size)
+{
+        unsigned long pages_in = toi_compress_bytes_in >> PAGE_SHIFT,
+                      pages_out = toi_compress_bytes_out >> PAGE_SHIFT;
+        int len;
+
+        /* Output the compression ratio achieved. */
+        if (*toi_compressor_name)
+                len = scnprintf(buffer, size, "- Compressor is '%s'.\n",
+                                toi_compressor_name);
+        else
+                len = scnprintf(buffer, size, "- Compressor is not set.\n");
+
+        if (pages_in)
+                len += scnprintf(buffer+len, size - len, "  Compressed "
+                        "%lu bytes into %lu (%ld percent compression).\n",
+                  toi_compress_bytes_in,
+                  toi_compress_bytes_out,
+                  (pages_in - pages_out) * 100 / pages_in);
+        return len;
+}
+
+/*
+ * toi_compress_compression_memory_needed
+ *
+ * Tell the caller how much memory we need to operate during hibernate/resume.
+ * Returns: Unsigned long. Maximum number of bytes of memory required for
+ * operation.
+ */
+static int toi_compress_memory_needed(void)
+{
+        return 2 * PAGE_SIZE;
+}
+
+static int toi_compress_storage_needed(void)
+{
+        return 2 * sizeof(unsigned long) + 2 * sizeof(int) +
+                strlen(toi_compressor_name) + 1;
+}
+
+/*
+ * toi_compress_save_config_info
+ * @buffer: Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Save informaton needed when reloading the image at resume time.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_compress_save_config_info(char *buffer)
+{
+        int len = strlen(toi_compressor_name) + 1, offset = 0;
+
+        *((unsigned long *) buffer) = toi_compress_bytes_in;
+        offset += sizeof(unsigned long);
+        *((unsigned long *) (buffer + offset)) = toi_compress_bytes_out;
+        offset += sizeof(unsigned long);
+        *((int *) (buffer + offset)) = toi_expected_compression;
+        offset += sizeof(int);
+        *((int *) (buffer + offset)) = len;
+        offset += sizeof(int);
+        strncpy(buffer + offset, toi_compressor_name, len);
+        return offset + len;
+}
+
+/* toi_compress_load_config_info
+ * @buffer: Pointer to the start of the data.
+ * @size: Number of bytes that were saved.
+ *
+ * Description:        Reload information needed for decompressing the image at
+ * resume time.
+ */
+static void toi_compress_load_config_info(char *buffer, int size)
+{
+        int len, offset = 0;
+
+        toi_compress_bytes_in = *((unsigned long *) buffer);
+        offset += sizeof(unsigned long);
+        toi_compress_bytes_out = *((unsigned long *) (buffer + offset));
+        offset += sizeof(unsigned long);
+        toi_expected_compression = *((int *) (buffer + offset));
+        offset += sizeof(int);
+        len = *((int *) (buffer + offset));
+        offset += sizeof(int);
+        strncpy(toi_compressor_name, buffer + offset, len);
+}
+
+static void toi_compress_pre_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+        bkd->compress_bytes_in = toi_compress_bytes_in;
+        bkd->compress_bytes_out = toi_compress_bytes_out;
+}
+
+static void toi_compress_post_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+        toi_compress_bytes_in = bkd->compress_bytes_in;
+        toi_compress_bytes_out = bkd->compress_bytes_out;
+}
+
+/*
+ * toi_expected_compression_ratio
+ *
+ * Description:        Returns the expected ratio between data passed into this module
+ *                 and the amount of data output when writing.
+ * Returns:        100 if the module is disabled. Otherwise the value set by the
+ *                 user via our sysfs entry.
+ */
+
+static int toi_compress_expected_ratio(void)
+{
+        if (!toi_compression_ops.enabled)
+                return 100;
+        else
+                return 100 - toi_expected_compression;
+}
+
+/*
+ * data for our sysfs entries.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_INT("expected_compression", SYSFS_RW, &toi_expected_compression,
+                        0, 99, 0, NULL),
+        SYSFS_INT("enabled", SYSFS_RW, &toi_compression_ops.enabled, 0, 1, 0,
+                        NULL),
+        SYSFS_STRING("algorithm", SYSFS_RW, toi_compressor_name, 31, 0, NULL),
+};
+
+/*
+ * Ops structure.
+ */
+static struct toi_module_ops toi_compression_ops = {
+        .type                        = FILTER_MODULE,
+        .name                        = "compression",
+        .directory                = "compression",
+        .module                        = THIS_MODULE,
+        .initialise                = toi_compress_init,
+        .memory_needed                 = toi_compress_memory_needed,
+        .print_debug_info        = toi_compress_print_debug_stats,
+        .save_config_info        = toi_compress_save_config_info,
+        .load_config_info        = toi_compress_load_config_info,
+        .storage_needed                = toi_compress_storage_needed,
+        .expected_compression        = toi_compress_expected_ratio,
+
+        .pre_atomic_restore        = toi_compress_pre_atomic_restore,
+        .post_atomic_restore        = toi_compress_post_atomic_restore,
+
+        .rw_init                = toi_compress_rw_init,
+        .rw_cleanup                = toi_compress_rw_cleanup,
+
+        .write_page                = toi_compress_write_page,
+        .read_page                = toi_compress_read_page,
+
+        .sysfs_data                = sysfs_params,
+        .num_sysfs_entries        = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+
+static __init int toi_compress_load(void)
+{
+        return toi_register_module(&toi_compression_ops);
+}
+
+late_initcall(toi_compress_load);
diff -Nur linux-4.2/kernel/power/tuxonice_copy_before_write.c linux-4.2-pck/kernel/power/tuxonice_copy_before_write.c
--- linux-4.2/kernel/power/tuxonice_copy_before_write.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_copy_before_write.c	2015-09-08 11:20:32.467011976 -0300
@@ -0,0 +1,240 @@
+/*
+ * kernel/power/tuxonice_copy_before_write.c
+ *
+ * Copyright (C) 2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines (apart from the fault handling code) to deal with allocating memory
+ * for copying pages before they are modified, restoring the contents and getting
+ * the contents written to disk.
+ */
+
+#include <linux/percpu-defs.h>
+#include <linux/sched.h>
+#include <linux/tuxonice.h>
+#include "tuxonice_alloc.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice.h"
+
+DEFINE_PER_CPU(struct toi_cbw_state, toi_cbw_states);
+#define CBWS_PER_PAGE (PAGE_SIZE / sizeof(struct toi_cbw))
+#define toi_cbw_pool_size 100
+
+static void _toi_free_cbw_data(struct toi_cbw_state *state)
+{
+    struct toi_cbw *page_ptr, *ptr, *next;
+
+    page_ptr = ptr = state->first;
+
+    while(ptr) {
+        next = ptr->next;
+
+        if (ptr->virt) {
+            toi__free_page(40, virt_to_page(ptr->virt));
+        }
+        if ((((unsigned long) ptr) & PAGE_MASK) != (unsigned long) page_ptr) {
+            /* Must be on a new page - free the previous one. */
+            toi__free_page(40, virt_to_page(page_ptr));
+            page_ptr = ptr;
+        }
+        ptr = next;
+    }
+
+    if (page_ptr) {
+        toi__free_page(40, virt_to_page(page_ptr));
+    }
+
+    state->first = state->next = state->last = NULL;
+    state->size = 0;
+}
+
+void toi_free_cbw_data(void)
+{
+    int i;
+
+    for_each_online_cpu(i) {
+        struct toi_cbw_state *state = &per_cpu(toi_cbw_states, i);
+
+        if (!state->first)
+            continue;
+
+        state->enabled = 0;
+
+        while (state->active) {
+            schedule();
+        }
+
+        _toi_free_cbw_data(state);
+    }
+}
+
+static int _toi_allocate_cbw_data(struct toi_cbw_state *state)
+{
+    while(state->size < toi_cbw_pool_size) {
+        int i;
+        struct toi_cbw *ptr;
+
+        ptr = (struct toi_cbw *) toi_get_zeroed_page(40, GFP_KERNEL);
+
+        if (!ptr) {
+            return -ENOMEM;
+        }
+
+        if (!state->first) {
+            state->first = state->next = state->last = ptr;
+        }
+
+        for (i = 0; i < CBWS_PER_PAGE; i++) {
+            struct toi_cbw *cbw = &ptr[i];
+
+            cbw->virt = (char *) toi_get_zeroed_page(40, GFP_KERNEL);
+            if (!cbw->virt) {
+                state->size += i;
+                printk("Out of memory allocating CBW pages.\n");
+                return -ENOMEM;
+            }
+
+            if (cbw == state->first)
+                continue;
+
+            state->last->next = cbw;
+            state->last = cbw;
+        }
+
+        state->size += CBWS_PER_PAGE;
+    }
+
+    state->enabled = 1;
+
+    return 0;
+}
+
+
+int toi_allocate_cbw_data(void)
+{
+    int i, result;
+
+    for_each_online_cpu(i) {
+        struct toi_cbw_state *state = &per_cpu(toi_cbw_states, i);
+
+        result = _toi_allocate_cbw_data(state);
+
+        if (result)
+            return result;
+    }
+
+    return 0;
+}
+
+void toi_cbw_restore(void)
+{
+    if (!toi_keeping_image)
+        return;
+
+}
+
+void toi_cbw_write(void)
+{
+    if (!toi_keeping_image)
+        return;
+
+}
+
+/**
+ * toi_cbw_test_read - Test copy before write on one page
+ *
+ * Allocate copy before write buffers, then make one page only copy-before-write
+ * and attempt to write to it. We should then be able to retrieve the original
+ * version from the cbw buffer and the modified version from the page itself.
+ */
+static int toi_cbw_test_read(const char *buffer, int count)
+{
+    unsigned long virt = toi_get_zeroed_page(40, GFP_KERNEL);
+    char *original = "Original contents";
+    char *modified = "Modified material";
+    struct page *page = virt_to_page(virt);
+    int i, len = 0, found = 0, pfn = page_to_pfn(page);
+
+    if (!page) {
+        printk("toi_cbw_test_read: Unable to allocate a page for testing.\n");
+        return -ENOMEM;
+    }
+
+    memcpy((char *) virt, original, strlen(original));
+
+    if (toi_allocate_cbw_data()) {
+        printk("toi_cbw_test_read: Unable to allocate cbw data.\n");
+        return -ENOMEM;
+    }
+
+    toi_reset_dirtiness_one(pfn, 0);
+
+    SetPageTOI_CBW(page);
+
+    memcpy((char *) virt, modified, strlen(modified));
+
+    if (strncmp((char *) virt, modified, strlen(modified))) {
+        len += sprintf((char *) buffer + len, "Failed to write to page after protecting it.\n");
+    }
+
+    for_each_online_cpu(i) {
+        struct toi_cbw_state *state = &per_cpu(toi_cbw_states, i);
+        struct toi_cbw *ptr = state->first, *last_ptr = ptr;
+
+        if (!found) {
+            while (ptr) {
+                if (ptr->pfn == pfn) {
+                    found = 1;
+                    if (strncmp(ptr->virt, original, strlen(original))) {
+                        len += sprintf((char *) buffer + len, "Contents of original buffer are not original.\n");
+                    } else {
+                        len += sprintf((char *) buffer + len, "Test passed. Buffer changed and original contents preserved.\n");
+                    }
+                    break;
+                }
+
+                last_ptr = ptr;
+                ptr = ptr->next;
+            }
+        }
+
+        if (!last_ptr)
+            len += sprintf((char *) buffer + len, "All available CBW buffers on cpu %d used.\n", i);
+    }
+
+    if (!found)
+        len += sprintf((char *) buffer + len, "Copy before write buffer not found.\n");
+
+    toi_free_cbw_data();
+
+    return len;
+}
+
+/*
+ * This array contains entries that are automatically registered at
+ * boot. Modules and the console code register their own entries separately.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_CUSTOM("test", SYSFS_RW, toi_cbw_test_read,
+                        NULL, SYSFS_NEEDS_SM_FOR_READ, NULL),
+};
+
+static struct toi_module_ops toi_cbw_ops = {
+        .type                                        = MISC_HIDDEN_MODULE,
+        .name                                        = "copy_before_write debugging",
+        .directory                                = "cbw",
+        .module                                        = THIS_MODULE,
+        .early                                        = 1,
+
+        .sysfs_data                = sysfs_params,
+        .num_sysfs_entries        = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+int toi_cbw_init(void)
+{
+        int result = toi_register_module(&toi_cbw_ops);
+        return result;
+}
diff -Nur linux-4.2/kernel/power/tuxonice_extent.c linux-4.2-pck/kernel/power/tuxonice_extent.c
--- linux-4.2/kernel/power/tuxonice_extent.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_extent.c	2015-09-08 11:20:32.467011976 -0300
@@ -0,0 +1,144 @@
+/*
+ * kernel/power/tuxonice_extent.c
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * These functions encapsulate the manipulation of storage metadata.
+ */
+
+#include <linux/suspend.h>
+#include "tuxonice_modules.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_ui.h"
+#include "tuxonice.h"
+
+/**
+ * toi_get_extent - return a free extent
+ *
+ * May fail, returning NULL instead.
+ **/
+static struct hibernate_extent *toi_get_extent(void)
+{
+        return (struct hibernate_extent *) toi_kzalloc(2,
+                        sizeof(struct hibernate_extent), TOI_ATOMIC_GFP);
+}
+
+/**
+ * toi_put_extent_chain - free a chain of extents starting from value 'from'
+ * @chain:        Chain to free.
+ *
+ * Note that 'from' is an extent value, and may be part way through an extent.
+ * In this case, the extent should be truncated (if necessary) and following
+ * extents freed.
+ **/
+void toi_put_extent_chain_from(struct hibernate_extent_chain *chain, unsigned long from)
+{
+        struct hibernate_extent *this;
+
+        this = chain->first;
+
+        while (this) {
+                struct hibernate_extent *next = this->next;
+
+                // Delete the whole extent?
+                if (this->start >= from) {
+                    chain->size -= (this->end - this->start + 1);
+                    if (chain->first == this)
+                        chain->first = next;
+                    if (chain->last_touched == this)
+                        chain->last_touched = NULL;
+                    if (chain->current_extent == this)
+                        chain->current_extent = NULL;
+                    toi_kfree(2, this, sizeof(*this));
+                    chain->num_extents--;
+                } else if (this->end >= from) {
+                    // Delete part of the extent
+                    chain->size -= (this->end - from + 1);
+                    this->start = from;
+                }
+                this = next;
+        }
+}
+
+/**
+ * toi_put_extent_chain - free a whole chain of extents
+ * @chain:        Chain to free.
+ **/
+void toi_put_extent_chain(struct hibernate_extent_chain *chain)
+{
+    toi_put_extent_chain_from(chain, 0);
+}
+
+/**
+ * toi_add_to_extent_chain - add an extent to an existing chain
+ * @chain:        Chain to which the extend should be added
+ * @start:        Start of the extent (first physical block)
+ * @end:        End of the extent (last physical block)
+ *
+ * The chain information is updated if the insertion is successful.
+ **/
+int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
+                unsigned long start, unsigned long end)
+{
+        struct hibernate_extent *new_ext = NULL, *cur_ext = NULL;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0,
+                "Adding extent %lu-%lu to chain %p.\n", start, end, chain);
+
+        /* Find the right place in the chain */
+        if (chain->last_touched && chain->last_touched->start < start)
+                cur_ext = chain->last_touched;
+        else if (chain->first && chain->first->start < start)
+                cur_ext = chain->first;
+
+        if (cur_ext) {
+                while (cur_ext->next && cur_ext->next->start < start)
+                        cur_ext = cur_ext->next;
+
+                if (cur_ext->end == (start - 1)) {
+                        struct hibernate_extent *next_ext = cur_ext->next;
+                        cur_ext->end = end;
+
+                        /* Merge with the following one? */
+                        if (next_ext && cur_ext->end + 1 == next_ext->start) {
+                                cur_ext->end = next_ext->end;
+                                cur_ext->next = next_ext->next;
+                                toi_kfree(2, next_ext, sizeof(*next_ext));
+                                chain->num_extents--;
+                        }
+
+                        chain->last_touched = cur_ext;
+                        chain->size += (end - start + 1);
+
+                        return 0;
+                }
+        }
+
+        new_ext = toi_get_extent();
+        if (!new_ext) {
+                printk(KERN_INFO "Error unable to append a new extent to the "
+                                "chain.\n");
+                return -ENOMEM;
+        }
+
+        chain->num_extents++;
+        chain->size += (end - start + 1);
+        new_ext->start = start;
+        new_ext->end = end;
+
+        chain->last_touched = new_ext;
+
+        if (cur_ext) {
+                new_ext->next = cur_ext->next;
+                cur_ext->next = new_ext;
+        } else {
+                if (chain->first)
+                        new_ext->next = chain->first;
+                chain->first = new_ext;
+        }
+
+        return 0;
+}
diff -Nur linux-4.2/kernel/power/tuxonice_extent.h linux-4.2-pck/kernel/power/tuxonice_extent.h
--- linux-4.2/kernel/power/tuxonice_extent.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_extent.h	2015-09-08 11:20:32.467011976 -0300
@@ -0,0 +1,45 @@
+/*
+ * kernel/power/tuxonice_extent.h
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations related to extents. Extents are
+ * TuxOnIce's method of storing some of the metadata for the image.
+ * See tuxonice_extent.c for more info.
+ *
+ */
+
+#include "tuxonice_modules.h"
+
+#ifndef EXTENT_H
+#define EXTENT_H
+
+struct hibernate_extent {
+        unsigned long start, end;
+        struct hibernate_extent *next;
+};
+
+struct hibernate_extent_chain {
+        unsigned long size; /* size of the chain ie sum (max-min+1) */
+        int num_extents;
+        struct hibernate_extent *first, *last_touched;
+        struct hibernate_extent *current_extent;
+        unsigned long current_offset;
+};
+
+/* Simplify iterating through all the values in an extent chain */
+#define toi_extent_for_each(extent_chain, extentpointer, value) \
+if ((extent_chain)->first) \
+        for ((extentpointer) = (extent_chain)->first, (value) = \
+                        (extentpointer)->start; \
+             ((extentpointer) && ((extentpointer)->next || (value) <= \
+                                 (extentpointer)->end)); \
+             (((value) == (extentpointer)->end) ? \
+                ((extentpointer) = (extentpointer)->next, (value) = \
+                 ((extentpointer) ? (extentpointer)->start : 0)) : \
+                        (value)++))
+
+extern void toi_put_extent_chain_from(struct hibernate_extent_chain *chain, unsigned long from);
+#endif
diff -Nur linux-4.2/kernel/power/tuxonice_file.c linux-4.2-pck/kernel/power/tuxonice_file.c
--- linux-4.2/kernel/power/tuxonice_file.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_file.c	2015-09-08 11:20:32.467011976 -0300
@@ -0,0 +1,484 @@
+/*
+ * kernel/power/tuxonice_file.c
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file encapsulates functions for usage of a simple file as a
+ * backing store. It is based upon the swapallocator, and shares the
+ * same basic working. Here, though, we have nothing to do with
+ * swapspace, and only one device to worry about.
+ *
+ * The user can just
+ *
+ * echo TuxOnIce > /path/to/my_file
+ *
+ * dd if=/dev/zero bs=1M count=<file_size_desired> >> /path/to/my_file
+ *
+ * and
+ *
+ * echo /path/to/my_file > /sys/power/tuxonice/file/target
+ *
+ * then put what they find in /sys/power/tuxonice/resume
+ * as their resume= parameter in lilo.conf (and rerun lilo if using it).
+ *
+ * Having done this, they're ready to hibernate and resume.
+ *
+ * TODO:
+ * - File resizing.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/mount.h>
+#include <linux/fs.h>
+#include <linux/fs_uuid.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_io.h"
+
+#define target_is_normal_file() (S_ISREG(target_inode->i_mode))
+
+static struct toi_module_ops toi_fileops;
+
+static struct file *target_file;
+static struct block_device *toi_file_target_bdev;
+static unsigned long pages_available, pages_allocated;
+static char toi_file_target[256];
+static struct inode *target_inode;
+static int file_target_priority;
+static int used_devt;
+static int target_claim;
+static dev_t toi_file_dev_t;
+static int sig_page_index;
+
+/* For test_toi_file_target */
+static struct toi_bdev_info *file_chain;
+
+static int has_contiguous_blocks(struct toi_bdev_info *dev_info, int page_num)
+{
+        int j;
+        sector_t last = 0;
+
+        for (j = 0; j < dev_info->blocks_per_page; j++) {
+                sector_t this = bmap(target_inode,
+                                page_num * dev_info->blocks_per_page + j);
+
+                if (!this || (last && (last + 1) != this))
+                        break;
+
+                last = this;
+        }
+
+        return j == dev_info->blocks_per_page;
+}
+
+static unsigned long get_usable_pages(struct toi_bdev_info *dev_info)
+{
+        unsigned long result = 0;
+        struct block_device *bdev = dev_info->bdev;
+        int i;
+
+        switch (target_inode->i_mode & S_IFMT) {
+        case S_IFSOCK:
+        case S_IFCHR:
+        case S_IFIFO: /* Socket, Char, Fifo */
+                return -1;
+        case S_IFREG: /* Regular file: current size - holes + free
+                         space on part */
+                for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT) ; i++) {
+                        if (has_contiguous_blocks(dev_info, i))
+                                result++;
+                }
+                break;
+        case S_IFBLK: /* Block device */
+                if (!bdev->bd_disk) {
+                        toi_message(TOI_IO, TOI_VERBOSE, 0,
+                                        "bdev->bd_disk null.");
+                        return 0;
+                }
+
+                result = (bdev->bd_part ?
+                        bdev->bd_part->nr_sects :
+                        get_capacity(bdev->bd_disk)) >> (PAGE_SHIFT - 9);
+        }
+
+
+        return result;
+}
+
+static int toi_file_register_storage(void)
+{
+        struct toi_bdev_info *devinfo;
+        int result = 0;
+        struct fs_info *fs_info;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_file_register_storage.");
+        if (!strlen(toi_file_target)) {
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Register file storage: "
+                                "No target filename set.");
+                return 0;
+        }
+
+        target_file = filp_open(toi_file_target, O_RDONLY|O_LARGEFILE, 0);
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "filp_open %s returned %p.",
+                        toi_file_target, target_file);
+
+        if (IS_ERR(target_file) || !target_file) {
+                target_file = NULL;
+                toi_file_dev_t = name_to_dev_t(toi_file_target);
+                if (!toi_file_dev_t) {
+                        struct kstat stat;
+                        int error = vfs_stat(toi_file_target, &stat);
+                        printk(KERN_INFO "Open file %s returned %p and "
+                                        "name_to_devt failed.\n",
+                                        toi_file_target, target_file);
+                        if (error) {
+                                printk(KERN_INFO "Stating the file also failed."
+                                        " Nothing more we can do.\n");
+                                return 0;
+                        } else
+                                toi_file_dev_t = stat.rdev;
+                }
+
+                toi_file_target_bdev = toi_open_by_devnum(toi_file_dev_t);
+                if (IS_ERR(toi_file_target_bdev)) {
+                        printk(KERN_INFO "Got a dev_num (%lx) but failed to "
+                                        "open it.\n",
+                                        (unsigned long) toi_file_dev_t);
+                        toi_file_target_bdev = NULL;
+                        return 0;
+                }
+                used_devt = 1;
+                target_inode = toi_file_target_bdev->bd_inode;
+        } else
+                target_inode = target_file->f_mapping->host;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Succeeded in opening the target.");
+        if (S_ISLNK(target_inode->i_mode) || S_ISDIR(target_inode->i_mode) ||
+            S_ISSOCK(target_inode->i_mode) || S_ISFIFO(target_inode->i_mode)) {
+                printk(KERN_INFO "File support works with regular files,"
+                                " character files and block devices.\n");
+                /* Cleanup routine will undo the above */
+                return 0;
+        }
+
+        if (!used_devt) {
+                if (S_ISBLK(target_inode->i_mode)) {
+                        toi_file_target_bdev = I_BDEV(target_inode);
+                        if (!blkdev_get(toi_file_target_bdev, FMODE_WRITE |
+                                                FMODE_READ, NULL))
+                                target_claim = 1;
+                } else
+                        toi_file_target_bdev = target_inode->i_sb->s_bdev;
+                if (!toi_file_target_bdev) {
+                        printk(KERN_INFO "%s is not a valid file allocator "
+                                        "target.\n", toi_file_target);
+                        return 0;
+                }
+                toi_file_dev_t = toi_file_target_bdev->bd_dev;
+        }
+
+        devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info), GFP_ATOMIC);
+        if (!devinfo) {
+                printk("Failed to allocate a toi_bdev_info struct for the file allocator.\n");
+                return -ENOMEM;
+        }
+
+        devinfo->bdev = toi_file_target_bdev;
+        devinfo->allocator = &toi_fileops;
+        devinfo->allocator_index = 0;
+
+        fs_info = fs_info_from_block_dev(toi_file_target_bdev);
+        if (fs_info && !IS_ERR(fs_info)) {
+                memcpy(devinfo->uuid, &fs_info->uuid, 16);
+                free_fs_info(fs_info);
+        } else
+                result = (int) PTR_ERR(fs_info);
+
+        /* Unlike swap code, only complain if fs_info_from_block_dev returned
+         * -ENOMEM. The 'file' might be a full partition, so might validly not
+         * have an identifiable type, UUID etc.
+         */
+        if (result)
+                printk(KERN_DEBUG "Failed to get fs_info for file device (%d).\n",
+                                result);
+        devinfo->dev_t = toi_file_dev_t;
+        devinfo->prio = file_target_priority;
+        devinfo->bmap_shift = target_inode->i_blkbits - 9;
+        devinfo->blocks_per_page =
+                (1 << (PAGE_SHIFT - target_inode->i_blkbits));
+        sprintf(devinfo->name, "file %s", toi_file_target);
+        file_chain = devinfo;
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Dev_t is %lx. Prio is %d. Bmap "
+                        "shift is %d. Blocks per page %d.",
+                        devinfo->dev_t, devinfo->prio, devinfo->bmap_shift,
+                        devinfo->blocks_per_page);
+
+        /* Keep one aside for the signature */
+        pages_available = get_usable_pages(devinfo) - 1;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering file storage, %lu "
+                        "pages.", pages_available);
+
+        toi_bio_ops.register_storage(devinfo);
+        return 0;
+}
+
+static unsigned long toi_file_storage_available(void)
+{
+        return pages_available;
+}
+
+static int toi_file_allocate_storage(struct toi_bdev_info *chain,
+                unsigned long request)
+{
+        unsigned long available = pages_available - pages_allocated;
+        unsigned long to_add = min(available, request);
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Pages available is %lu. Allocated "
+                "is %lu. Allocating %lu pages from file.",
+                pages_available, pages_allocated, to_add);
+        pages_allocated += to_add;
+
+        return to_add;
+}
+
+/**
+ * __populate_block_list - add an extent to the chain
+ * @min:        Start of the extent (first physical block = sector)
+ * @max:        End of the extent (last physical block = sector)
+ *
+ * If TOI_TEST_BIO is set, print a debug message, outputting the min and max
+ * fs block numbers.
+ **/
+static int __populate_block_list(struct toi_bdev_info *chain, int min, int max)
+{
+        if (test_action_state(TOI_TEST_BIO))
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %d-%d.",
+                        min << chain->bmap_shift,
+                        ((max + 1) << chain->bmap_shift) - 1);
+
+        return toi_add_to_extent_chain(&chain->blocks, min, max);
+}
+
+static int get_main_pool_phys_params(struct toi_bdev_info *chain)
+{
+        int i, extent_min = -1, extent_max = -1, result = 0, have_sig_page = 0;
+        unsigned long pages_mapped = 0;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Getting file allocator blocks.");
+
+        if (chain->blocks.first)
+                toi_put_extent_chain(&chain->blocks);
+
+        if (!target_is_normal_file()) {
+                result = (pages_available > 0) ?
+                        __populate_block_list(chain, chain->blocks_per_page,
+                                (pages_allocated + 1) *
+                                chain->blocks_per_page - 1) : 0;
+                return result;
+        }
+
+        /*
+         * FIXME: We are assuming the first page is contiguous. Is that
+         * assumption always right?
+         */
+
+        for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT); i++) {
+                sector_t new_sector;
+
+                if (!has_contiguous_blocks(chain, i))
+                        continue;
+
+                if (!have_sig_page) {
+                        have_sig_page = 1;
+                        sig_page_index = i;
+                        continue;
+                }
+
+                pages_mapped++;
+
+                /* Ignore first page - it has the header */
+                if (pages_mapped == 1)
+                        continue;
+
+                new_sector = bmap(target_inode, (i * chain->blocks_per_page));
+
+                /*
+                 * I'd love to be able to fill in holes and resize
+                 * files, but not yet...
+                 */
+
+                if (new_sector == extent_max + 1)
+                        extent_max += chain->blocks_per_page;
+                else {
+                        if (extent_min > -1) {
+                                result = __populate_block_list(chain,
+                                                extent_min, extent_max);
+                                if (result)
+                                        return result;
+                        }
+
+                        extent_min = new_sector;
+                        extent_max = extent_min +
+                                chain->blocks_per_page - 1;
+                }
+
+                if (pages_mapped == pages_allocated)
+                        break;
+        }
+
+        if (extent_min > -1) {
+                result = __populate_block_list(chain, extent_min, extent_max);
+                if (result)
+                        return result;
+        }
+
+        return 0;
+}
+
+static void toi_file_free_storage(struct toi_bdev_info *chain)
+{
+        pages_allocated = 0;
+        file_chain = NULL;
+}
+
+/**
+ * toi_file_print_debug_stats - print debug info
+ * @buffer:        Buffer to data to populate
+ * @size:        Size of the buffer
+ **/
+static int toi_file_print_debug_stats(char *buffer, int size)
+{
+        int len = scnprintf(buffer, size, "- File Allocator active.\n");
+
+        len += scnprintf(buffer+len, size-len, "  Storage available for "
+                        "image: %lu pages.\n", pages_available);
+
+        return len;
+}
+
+static void toi_file_cleanup(int finishing_cycle)
+{
+        if (toi_file_target_bdev) {
+                if (target_claim) {
+                        blkdev_put(toi_file_target_bdev, FMODE_WRITE | FMODE_READ);
+                        target_claim = 0;
+                }
+
+                if (used_devt) {
+                        blkdev_put(toi_file_target_bdev,
+                                        FMODE_READ | FMODE_NDELAY);
+                        used_devt = 0;
+                }
+                toi_file_target_bdev = NULL;
+                target_inode = NULL;
+        }
+
+        if (target_file) {
+                filp_close(target_file, NULL);
+                target_file = NULL;
+        }
+
+        pages_available = 0;
+}
+
+/**
+ * test_toi_file_target - sysfs callback for /sys/power/tuxonince/file/target
+ *
+ * Test wheter the target file is valid for hibernating.
+ **/
+static void test_toi_file_target(void)
+{
+        int result = toi_file_register_storage();
+        sector_t sector;
+        char buf[50];
+        struct fs_info *fs_info;
+
+        if (result || !file_chain)
+                return;
+
+        /* This doesn't mean we're in business. Is any storage available? */
+        if (!pages_available)
+                goto out;
+
+        toi_file_allocate_storage(file_chain, 1);
+        result = get_main_pool_phys_params(file_chain);
+        if (result)
+                goto out;
+
+
+        sector = bmap(target_inode, sig_page_index *
+                        file_chain->blocks_per_page) << file_chain->bmap_shift;
+
+        /* Use the uuid, or the dev_t if that fails */
+        fs_info = fs_info_from_block_dev(toi_file_target_bdev);
+        if (!fs_info || IS_ERR(fs_info)) {
+                bdevname(toi_file_target_bdev, buf);
+                sprintf(resume_file, "/dev/%s:%llu", buf,
+                                (unsigned long long) sector);
+        } else {
+                int i;
+                hex_dump_to_buffer(fs_info->uuid, 16, 32, 1, buf, 50, 0);
+
+                /* Remove the spaces */
+                for (i = 1; i < 16; i++) {
+                        buf[2 * i] = buf[3 * i];
+                        buf[2 * i + 1] = buf[3 * i + 1];
+                }
+                buf[32] = 0;
+                sprintf(resume_file, "UUID=%s:0x%llx", buf,
+                                (unsigned long long) sector);
+                free_fs_info(fs_info);
+        }
+
+        toi_attempt_to_parse_resume_device(0);
+out:
+        toi_file_free_storage(file_chain);
+        toi_bio_ops.free_storage();
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_STRING("target", SYSFS_RW, toi_file_target, 256,
+                SYSFS_NEEDS_SM_FOR_WRITE, test_toi_file_target),
+        SYSFS_INT("enabled", SYSFS_RW, &toi_fileops.enabled, 0, 1, 0, NULL),
+        SYSFS_INT("priority", SYSFS_RW, &file_target_priority, -4095,
+                        4096, 0, NULL),
+};
+
+static struct toi_bio_allocator_ops toi_bio_fileops = {
+        .register_storage                        = toi_file_register_storage,
+        .storage_available                        = toi_file_storage_available,
+        .allocate_storage                        = toi_file_allocate_storage,
+        .bmap                                        = get_main_pool_phys_params,
+        .free_storage                                = toi_file_free_storage,
+};
+
+static struct toi_module_ops toi_fileops = {
+        .type                                        = BIO_ALLOCATOR_MODULE,
+        .name                                        = "file storage",
+        .directory                                = "file",
+        .module                                        = THIS_MODULE,
+        .print_debug_info                        = toi_file_print_debug_stats,
+        .cleanup                                = toi_file_cleanup,
+        .bio_allocator_ops                        = &toi_bio_fileops,
+
+        .sysfs_data                = sysfs_params,
+        .num_sysfs_entries        = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+static __init int toi_file_load(void)
+{
+        return toi_register_module(&toi_fileops);
+}
+
+late_initcall(toi_file_load);
diff -Nur linux-4.2/kernel/power/tuxonice_highlevel.c linux-4.2-pck/kernel/power/tuxonice_highlevel.c
--- linux-4.2/kernel/power/tuxonice_highlevel.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_highlevel.c	2015-09-08 11:20:32.467011976 -0300
@@ -0,0 +1,1413 @@
+/*
+ * kernel/power/tuxonice_highlevel.c
+ */
+/** \mainpage TuxOnIce.
+ *
+ * TuxOnIce provides support for saving and restoring an image of
+ * system memory to an arbitrary storage device, either on the local computer,
+ * or across some network. The support is entirely OS based, so TuxOnIce
+ * works without requiring BIOS, APM or ACPI support. The vast majority of the
+ * code is also architecture independant, so it should be very easy to port
+ * the code to new architectures. TuxOnIce includes support for SMP, 4G HighMem
+ * and preemption. Initramfses and initrds are also supported.
+ *
+ * TuxOnIce uses a modular design, in which the method of storing the image is
+ * completely abstracted from the core code, as are transformations on the data
+ * such as compression and/or encryption (multiple 'modules' can be used to
+ * provide arbitrary combinations of functionality). The user interface is also
+ * modular, so that arbitrarily simple or complex interfaces can be used to
+ * provide anything from debugging information through to eye candy.
+ *
+ * \section Copyright
+ *
+ * TuxOnIce is released under the GPLv2.
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu><BR>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz><BR>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr><BR>
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)<BR>
+ *
+ * \section Credits
+ *
+ * Nigel would like to thank the following people for their work:
+ *
+ * Bernard Blackham <bernard@blackham.com.au><BR>
+ * Web page & Wiki administration, some coding. A person without whom
+ * TuxOnIce would not be where it is.
+ *
+ * Michael Frank <mhf@linuxmail.org><BR>
+ * Extensive testing and help with improving stability. I was constantly
+ * amazed by the quality and quantity of Michael's help.
+ *
+ * Pavel Machek <pavel@ucw.cz><BR>
+ * Modifications, defectiveness pointing, being with Gabor at the very
+ * beginning, suspend to swap space, stop all tasks. Port to 2.4.18-ac and
+ * 2.5.17. Even though Pavel and I disagree on the direction suspend to
+ * disk should take, I appreciate the valuable work he did in helping Gabor
+ * get the concept working.
+ *
+ * ..and of course the myriads of TuxOnIce users who have helped diagnose
+ * and fix bugs, made suggestions on how to improve the code, proofread
+ * documentation, and donated time and money.
+ *
+ * Thanks also to corporate sponsors:
+ *
+ * <B>Redhat.</B>Sometime employer from May 2006 (my fault, not Redhat's!).
+ *
+ * <B>Cyclades.com.</B> Nigel's employers from Dec 2004 until May 2006, who
+ * allowed him to work on TuxOnIce and PM related issues on company time.
+ *
+ * <B>LinuxFund.org.</B> Sponsored Nigel's work on TuxOnIce for four months Oct
+ * 2003 to Jan 2004.
+ *
+ * <B>LAC Linux.</B> Donated P4 hardware that enabled development and ongoing
+ * maintenance of SMP and Highmem support.
+ *
+ * <B>OSDL.</B> Provided access to various hardware configurations, make
+ * occasional small donations to the project.
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/freezer.h>
+#include <generated/utsrelease.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+#include <linux/writeback.h>
+#include <linux/uaccess.h> /* for get/set_fs & KERNEL_DS on i386 */
+#include <linux/bio.h>
+#include <linux/kgdb.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_atomic_copy.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_cluster.h"
+
+/*! Pageset metadata. */
+struct pagedir pagedir2 = {2};
+
+static mm_segment_t oldfs;
+static DEFINE_MUTEX(tuxonice_in_use);
+static int block_dump_save;
+
+int toi_trace_index;
+
+/* Binary signature if an image is present */
+char tuxonice_signature[9] = "\xed\xc3\x02\xe9\x98\x56\xe5\x0c";
+
+unsigned long boot_kernel_data_buffer;
+
+static char *result_strings[] = {
+        "Hibernation was aborted",
+        "The user requested that we cancel the hibernation",
+        "No storage was available",
+        "Insufficient storage was available",
+        "Freezing filesystems and/or tasks failed",
+        "A pre-existing image was used",
+        "We would free memory, but image size limit doesn't allow this",
+        "Unable to free enough memory to hibernate",
+        "Unable to obtain the Power Management Semaphore",
+        "A device suspend/resume returned an error",
+        "A system device suspend/resume returned an error",
+        "The extra pages allowance is too small",
+        "We were unable to successfully prepare an image",
+        "TuxOnIce module initialisation failed",
+        "TuxOnIce module cleanup failed",
+        "I/O errors were encountered",
+        "Ran out of memory",
+        "An error was encountered while reading the image",
+        "Platform preparation failed",
+        "CPU Hotplugging failed",
+        "Architecture specific preparation failed",
+        "Pages needed resaving, but we were told to abort if this happens",
+        "We can't hibernate at the moment (invalid resume= or filewriter "
+                "target?)",
+        "A hibernation preparation notifier chain member cancelled the "
+                "hibernation",
+        "Pre-snapshot preparation failed",
+        "Pre-restore preparation failed",
+        "Failed to disable usermode helpers",
+        "Can't resume from alternate image",
+        "Header reservation too small",
+        "Device Power Management Preparation failed",
+};
+
+/**
+ * toi_finish_anything - cleanup after doing anything
+ * @hibernate_or_resume:        Whether finishing a cycle or attempt at
+ *                                resuming.
+ *
+ * This is our basic clean-up routine, matching start_anything below. We
+ * call cleanup routines, drop module references and restore process fs and
+ * cpus allowed masks, together with the global block_dump variable's value.
+ **/
+void toi_finish_anything(int hibernate_or_resume)
+{
+        toi_running = 0;
+        toi_cleanup_modules(hibernate_or_resume);
+        toi_put_modules();
+        if (hibernate_or_resume) {
+                block_dump = block_dump_save;
+                set_cpus_allowed_ptr(current, cpu_all_mask);
+                toi_alloc_print_debug_stats();
+                atomic_inc(&snapshot_device_available);
+    unlock_system_sleep();
+        }
+
+        set_fs(oldfs);
+        mutex_unlock(&tuxonice_in_use);
+}
+
+/**
+ * toi_start_anything - basic initialisation for TuxOnIce
+ * @toi_or_resume:        Whether starting a cycle or attempt at resuming.
+ *
+ * Our basic initialisation routine. Take references on modules, use the
+ * kernel segment, recheck resume= if no active allocator is set, initialise
+ * modules, save and reset block_dump and ensure we're running on CPU0.
+ **/
+int toi_start_anything(int hibernate_or_resume)
+{
+        mutex_lock(&tuxonice_in_use);
+
+        oldfs = get_fs();
+        set_fs(KERNEL_DS);
+
+        toi_trace_index = 0;
+
+        if (hibernate_or_resume) {
+    lock_system_sleep();
+
+                if (!atomic_add_unless(&snapshot_device_available, -1, 0))
+                        goto snapshotdevice_unavailable;
+        }
+
+        if (hibernate_or_resume == SYSFS_HIBERNATE)
+                toi_print_modules();
+
+        if (toi_get_modules()) {
+                printk(KERN_INFO "TuxOnIce: Get modules failed!\n");
+                goto prehibernate_err;
+        }
+
+        if (hibernate_or_resume) {
+                block_dump_save = block_dump;
+                block_dump = 0;
+                set_cpus_allowed_ptr(current,
+                                cpumask_of(cpumask_first(cpu_online_mask)));
+        }
+
+        if (toi_initialise_modules_early(hibernate_or_resume))
+                goto early_init_err;
+
+        if (!toiActiveAllocator)
+                toi_attempt_to_parse_resume_device(!hibernate_or_resume);
+
+        if (!toi_initialise_modules_late(hibernate_or_resume)) {
+            toi_running = 1; /* For the swsusp code we use :< */
+            return 0;
+        }
+
+        toi_cleanup_modules(hibernate_or_resume);
+early_init_err:
+        if (hibernate_or_resume) {
+                block_dump_save = block_dump;
+                set_cpus_allowed_ptr(current, cpu_all_mask);
+        }
+        toi_put_modules();
+prehibernate_err:
+        if (hibernate_or_resume)
+                atomic_inc(&snapshot_device_available);
+snapshotdevice_unavailable:
+        if (hibernate_or_resume)
+                mutex_unlock(&pm_mutex);
+        set_fs(oldfs);
+        mutex_unlock(&tuxonice_in_use);
+        return -EBUSY;
+}
+
+/*
+ * Nosave page tracking.
+ *
+ * Here rather than in prepare_image because we want to do it once only at the
+ * start of a cycle.
+ */
+
+/**
+ * mark_nosave_pages - set up our Nosave bitmap
+ *
+ * Build a bitmap of Nosave pages from the list. The bitmap allows faster
+ * use when preparing the image.
+ **/
+static void mark_nosave_pages(void)
+{
+        struct nosave_region *region;
+
+        list_for_each_entry(region, &nosave_regions, list) {
+                unsigned long pfn;
+
+                for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
+                        if (pfn_valid(pfn)) {
+                                SetPageNosave(pfn_to_page(pfn));
+                        }
+        }
+}
+
+/**
+ * allocate_bitmaps - allocate bitmaps used to record page states
+ *
+ * Allocate the bitmaps we use to record the various TuxOnIce related
+ * page states.
+ **/
+static int allocate_bitmaps(void)
+{
+        if (toi_alloc_bitmap(&pageset1_map) ||
+            toi_alloc_bitmap(&pageset1_copy_map) ||
+            toi_alloc_bitmap(&pageset2_map) ||
+            toi_alloc_bitmap(&io_map) ||
+            toi_alloc_bitmap(&nosave_map) ||
+            toi_alloc_bitmap(&free_map) ||
+            toi_alloc_bitmap(&compare_map) ||
+            toi_alloc_bitmap(&page_resave_map))
+                return 1;
+
+        return 0;
+}
+
+/**
+ * free_bitmaps - free the bitmaps used to record page states
+ *
+ * Free the bitmaps allocated above. It is not an error to call
+ * memory_bm_free on a bitmap that isn't currently allocated.
+ **/
+static void free_bitmaps(void)
+{
+        toi_free_bitmap(&pageset1_map);
+        toi_free_bitmap(&pageset1_copy_map);
+        toi_free_bitmap(&pageset2_map);
+        toi_free_bitmap(&io_map);
+        toi_free_bitmap(&nosave_map);
+        toi_free_bitmap(&free_map);
+        toi_free_bitmap(&compare_map);
+        toi_free_bitmap(&page_resave_map);
+}
+
+/**
+ * io_MB_per_second - return the number of MB/s read or written
+ * @write:        Whether to return the speed at which we wrote.
+ *
+ * Calculate the number of megabytes per second that were read or written.
+ **/
+static int io_MB_per_second(int write)
+{
+        return (toi_bkd.toi_io_time[write][1]) ?
+                MB((unsigned long) toi_bkd.toi_io_time[write][0]) * HZ /
+                toi_bkd.toi_io_time[write][1] : 0;
+}
+
+#define SNPRINTF(a...)         do { len += scnprintf(((char *) buffer) + len, \
+                count - len - 1, ## a); } while (0)
+
+/**
+ * get_debug_info - fill a buffer with debugging information
+ * @buffer:        The buffer to be filled.
+ * @count:        The size of the buffer, in bytes.
+ *
+ * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will
+ * either printk or return via sysfs.
+ **/
+static int get_toi_debug_info(const char *buffer, int count)
+{
+        int len = 0, i, first_result = 1;
+
+        SNPRINTF("TuxOnIce debugging info:\n");
+        SNPRINTF("- TuxOnIce core  : " TOI_CORE_VERSION "\n");
+        SNPRINTF("- Kernel Version : " UTS_RELEASE "\n");
+        SNPRINTF("- Compiler vers. : %d.%d\n", __GNUC__, __GNUC_MINOR__);
+        SNPRINTF("- Attempt number : %d\n", nr_hibernates);
+        SNPRINTF("- Parameters     : %ld %ld %ld %d %ld %ld\n",
+                        toi_result,
+                        toi_bkd.toi_action,
+                        toi_bkd.toi_debug_state,
+                        toi_bkd.toi_default_console_level,
+                        image_size_limit,
+                        toi_poweroff_method);
+        SNPRINTF("- Overall expected compression percentage: %d.\n",
+                        100 - toi_expected_compression_ratio());
+        len += toi_print_module_debug_info(((char *) buffer) + len,
+                        count - len - 1);
+        if (toi_bkd.toi_io_time[0][1]) {
+                if ((io_MB_per_second(0) < 5) || (io_MB_per_second(1) < 5)) {
+                        SNPRINTF("- I/O speed: Write %ld KB/s",
+                          (KB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
+                          toi_bkd.toi_io_time[0][1]));
+                        if (toi_bkd.toi_io_time[1][1])
+                                SNPRINTF(", Read %ld KB/s",
+                                  (KB((unsigned long)
+                                      toi_bkd.toi_io_time[1][0]) * HZ /
+                                  toi_bkd.toi_io_time[1][1]));
+                } else {
+                        SNPRINTF("- I/O speed: Write %ld MB/s",
+                         (MB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
+                          toi_bkd.toi_io_time[0][1]));
+                        if (toi_bkd.toi_io_time[1][1])
+                                SNPRINTF(", Read %ld MB/s",
+                                 (MB((unsigned long)
+                                     toi_bkd.toi_io_time[1][0]) * HZ /
+                                  toi_bkd.toi_io_time[1][1]));
+                }
+                SNPRINTF(".\n");
+        } else
+                SNPRINTF("- No I/O speed stats available.\n");
+        SNPRINTF("- Extra pages    : %lu used/%lu.\n",
+                        extra_pd1_pages_used, extra_pd1_pages_allowance);
+
+        for (i = 0; i < TOI_NUM_RESULT_STATES; i++)
+                if (test_result_state(i)) {
+                        SNPRINTF("%s: %s.\n", first_result ?
+                                        "- Result         " :
+                                        "                 ",
+                                        result_strings[i]);
+                        first_result = 0;
+                }
+        if (first_result)
+                SNPRINTF("- Result         : %s.\n", nr_hibernates ?
+                        "Succeeded" :
+                        "No hibernation attempts so far");
+        return len;
+}
+
+#ifdef CONFIG_TOI_INCREMENTAL
+/**
+ * get_toi_page_state - fill a buffer with page state information
+ * @buffer:        The buffer to be filled.
+ * @count:        The size of the buffer, in bytes.
+ *
+ * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will
+ * either printk or return via sysfs.
+ **/
+static int get_toi_page_state(const char *buffer, int count)
+{
+    int free = 0, untracked = 0, dirty = 0, ro = 0, invalid = 0, other = 0, total = 0;
+    int len = 0;
+    struct zone *zone;
+    int allocated_bitmaps = 0;
+
+    set_cpus_allowed_ptr(current,
+            cpumask_of(cpumask_first(cpu_online_mask)));
+
+    if (!free_map) {
+        BUG_ON(toi_alloc_bitmap(&free_map));
+        allocated_bitmaps = 1;
+    }
+
+    toi_generate_free_page_map();
+
+    for_each_populated_zone(zone) {
+        unsigned long loop;
+
+        total += zone->spanned_pages;
+
+        for (loop = 0; loop < zone->spanned_pages; loop++) {
+            unsigned long pfn = zone->zone_start_pfn + loop;
+            struct page *page;
+            int chunk_size;
+
+            if (!pfn_valid(pfn)) {
+                continue;
+            }
+
+            chunk_size = toi_size_of_free_region(zone, pfn);
+            if (chunk_size) {
+                /*
+                 * If the page gets allocated, it will be need
+                 * saving in an image.
+                 * Don't bother with explicitly removing any
+                 * RO protection applied below.
+                 * We'll SetPageTOI_Dirty(page) if/when it
+                 * gets allocated.
+                 */
+                free += chunk_size;
+                loop += chunk_size - 1;
+                continue;
+            }
+
+            page = pfn_to_page(pfn);
+
+            if (PageTOI_Untracked(page)) {
+                untracked++;
+            } else if (PageTOI_RO(page)) {
+                ro++;
+            } else if (PageTOI_Dirty(page)) {
+                dirty++;
+            } else {
+                printk("Page %ld state 'other'.\n", pfn);
+                other++;
+            }
+        }
+    }
+
+    if (allocated_bitmaps) {
+        toi_free_bitmap(&free_map);
+    }
+
+    set_cpus_allowed_ptr(current, cpu_all_mask);
+
+    SNPRINTF("TuxOnIce page breakdown:\n");
+    SNPRINTF("- Free           : %d\n", free);
+    SNPRINTF("- Untracked      : %d\n", untracked);
+    SNPRINTF("- Read only      : %d\n", ro);
+    SNPRINTF("- Dirty          : %d\n", dirty);
+    SNPRINTF("- Other          : %d\n", other);
+    SNPRINTF("- Invalid        : %d\n", invalid);
+    SNPRINTF("- Total          : %d\n", total);
+    return len;
+}
+#endif
+
+/**
+ * do_cleanup - cleanup after attempting to hibernate or resume
+ * @get_debug_info:        Whether to allocate and return debugging info.
+ *
+ * Cleanup after attempting to hibernate or resume, possibly getting
+ * debugging info as we do so.
+ **/
+static void do_cleanup(int get_debug_info, int restarting)
+{
+        int i = 0;
+        char *buffer = NULL;
+
+        trap_non_toi_io = 0;
+
+        if (get_debug_info)
+                toi_prepare_status(DONT_CLEAR_BAR, "Cleaning up...");
+
+        free_checksum_pages();
+
+        toi_cbw_restore();
+        toi_free_cbw_data();
+
+        if (get_debug_info)
+                buffer = (char *) toi_get_zeroed_page(20, TOI_ATOMIC_GFP);
+
+        if (buffer)
+                i = get_toi_debug_info(buffer, PAGE_SIZE);
+
+        toi_free_extra_pagedir_memory();
+
+        pagedir1.size = 0;
+        pagedir2.size = 0;
+        set_highmem_size(pagedir1, 0);
+        set_highmem_size(pagedir2, 0);
+
+        if (boot_kernel_data_buffer) {
+                if (!test_toi_state(TOI_BOOT_KERNEL))
+                        toi_free_page(37, boot_kernel_data_buffer);
+                boot_kernel_data_buffer = 0;
+        }
+
+        if (test_toi_state(TOI_DEVICE_HOTPLUG_LOCKED)) {
+                unlock_device_hotplug();
+                clear_toi_state(TOI_DEVICE_HOTPLUG_LOCKED);
+        }
+
+        clear_toi_state(TOI_BOOT_KERNEL);
+        if (current->flags & PF_SUSPEND_TASK)
+                thaw_processes();
+
+        if (!restarting)
+                toi_stop_other_threads();
+
+        if (toi_keeping_image &&
+            !test_result_state(TOI_ABORTED)) {
+                toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
+                        "TuxOnIce: Not invalidating the image due "
+                        "to Keep Image or Incremental Image being enabled.");
+                set_result_state(TOI_KEPT_IMAGE);
+
+                /*
+                 * For an incremental image, free unused storage so
+                 * swap (if any) can be used for normal system operation,
+                 * if so desired.
+                 */
+
+                toiActiveAllocator->free_unused_storage();
+        } else
+                if (toiActiveAllocator)
+                        toiActiveAllocator->remove_image();
+
+        free_bitmaps();
+        usermodehelper_enable();
+
+        if (test_toi_state(TOI_NOTIFIERS_PREPARE)) {
+                pm_notifier_call_chain(PM_POST_HIBERNATION);
+                clear_toi_state(TOI_NOTIFIERS_PREPARE);
+        }
+
+        if (buffer && i) {
+                /* Printk can only handle 1023 bytes, including
+                 * its level mangling. */
+                for (i = 0; i < 3; i++)
+                        printk(KERN_ERR "%s", buffer + (1023 * i));
+                toi_free_page(20, (unsigned long) buffer);
+        }
+
+        if (!restarting)
+                toi_cleanup_console();
+
+        free_attention_list();
+
+        if (!restarting)
+                toi_deactivate_storage(0);
+
+        clear_toi_state(TOI_IGNORE_LOGLEVEL);
+        clear_toi_state(TOI_TRYING_TO_RESUME);
+        clear_toi_state(TOI_NOW_RESUMING);
+}
+
+/**
+ * check_still_keeping_image - we kept an image; check whether to reuse it.
+ *
+ * We enter this routine when we have kept an image. If the user has said they
+ * want to still keep it, all we need to do is powerdown. If powering down
+ * means hibernating to ram and the power doesn't run out, we'll return 1.
+ * If we do power off properly or the battery runs out, we'll resume via the
+ * normal paths.
+ *
+ * If the user has said they want to remove the previously kept image, we
+ * remove it, and return 0. We'll then store a new image.
+ **/
+static int check_still_keeping_image(void)
+{
+    if (toi_keeping_image) {
+        if (!test_action_state(TOI_INCREMENTAL_IMAGE)) {
+            printk(KERN_INFO "Image already stored: powering down "
+                    "immediately.");
+            do_toi_step(STEP_HIBERNATE_POWERDOWN);
+            return 1;
+        }
+        /**
+         * Incremental image - need to write new part.
+         * We detect that we're writing an incremental image by looking
+         * at test_result_state(TOI_KEPT_IMAGE)
+         **/
+        return 0;
+    }
+
+    printk(KERN_INFO "Invalidating previous image.\n");
+    toiActiveAllocator->remove_image();
+
+    return 0;
+}
+
+/**
+ * toi_init - prepare to hibernate to disk
+ *
+ * Initialise variables & data structures, in preparation for
+ * hibernating to disk.
+ **/
+static int toi_init(int restarting)
+{
+        int result, i, j;
+
+        toi_result = 0;
+
+        printk(KERN_INFO "Initiating a hibernation cycle.\n");
+
+        nr_hibernates++;
+
+        for (i = 0; i < 2; i++)
+                for (j = 0; j < 2; j++)
+                        toi_bkd.toi_io_time[i][j] = 0;
+
+        if (!test_toi_state(TOI_CAN_HIBERNATE) ||
+            allocate_bitmaps())
+                return 1;
+
+        mark_nosave_pages();
+
+        if (!restarting)
+                toi_prepare_console();
+
+        result = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+        if (result) {
+                set_result_state(TOI_NOTIFIERS_PREPARE_FAILED);
+                return 1;
+        }
+        set_toi_state(TOI_NOTIFIERS_PREPARE);
+
+        if (!restarting) {
+                printk(KERN_ERR "Starting other threads.");
+                toi_start_other_threads();
+        }
+
+        result = usermodehelper_disable();
+        if (result) {
+                printk(KERN_ERR "TuxOnIce: Failed to disable usermode "
+                                "helpers\n");
+                set_result_state(TOI_USERMODE_HELPERS_ERR);
+                return 1;
+        }
+
+        boot_kernel_data_buffer = toi_get_zeroed_page(37, TOI_ATOMIC_GFP);
+        if (!boot_kernel_data_buffer) {
+                printk(KERN_ERR "TuxOnIce: Failed to allocate "
+                                "boot_kernel_data_buffer.\n");
+                set_result_state(TOI_OUT_OF_MEMORY);
+                return 1;
+        }
+
+        toi_allocate_cbw_data();
+
+        return 0;
+}
+
+/**
+ * can_hibernate - perform basic 'Can we hibernate?' tests
+ *
+ * Perform basic tests that must pass if we're going to be able to hibernate:
+ * Can we get the pm_mutex? Is resume= valid (we need to know where to write
+ * the image header).
+ **/
+static int can_hibernate(void)
+{
+        if (!test_toi_state(TOI_CAN_HIBERNATE))
+                toi_attempt_to_parse_resume_device(0);
+
+        if (!test_toi_state(TOI_CAN_HIBERNATE)) {
+                printk(KERN_INFO "TuxOnIce: Hibernation is disabled.\n"
+                        "This may be because you haven't put something along "
+                        "the lines of\n\nresume=swap:/dev/hda1\n\n"
+                        "in lilo.conf or equivalent. (Where /dev/hda1 is your "
+                        "swap partition).\n");
+                set_abort_result(TOI_CANT_SUSPEND);
+                return 0;
+        }
+
+        if (strlen(alt_resume_param)) {
+                attempt_to_parse_alt_resume_param();
+
+                if (!strlen(alt_resume_param)) {
+                        printk(KERN_INFO "Alternate resume parameter now "
+                                        "invalid. Aborting.\n");
+                        set_abort_result(TOI_CANT_USE_ALT_RESUME);
+                        return 0;
+                }
+        }
+
+        return 1;
+}
+
+/**
+ * do_post_image_write - having written an image, figure out what to do next
+ *
+ * After writing an image, we might load an alternate image or power down.
+ * Powering down might involve hibernating to ram, in which case we also
+ * need to handle reloading pageset2.
+ **/
+static int do_post_image_write(void)
+{
+        /* If switching images fails, do normal powerdown */
+        if (alt_resume_param[0])
+                do_toi_step(STEP_RESUME_ALT_IMAGE);
+
+        toi_power_down();
+
+        barrier();
+        mb();
+        return 0;
+}
+
+/**
+ * __save_image - do the hard work of saving the image
+ *
+ * High level routine for getting the image saved. The key assumptions made
+ * are that processes have been frozen and sufficient memory is available.
+ *
+ * We also exit through here at resume time, coming back from toi_hibernate
+ * after the atomic restore. This is the reason for the toi_in_hibernate
+ * test.
+ **/
+static int __save_image(void)
+{
+        int temp_result, did_copy = 0;
+
+        toi_prepare_status(DONT_CLEAR_BAR, "Starting to save the image..");
+
+        toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
+                " - Final values: %d and %d.",
+                pagedir1.size, pagedir2.size);
+
+        toi_cond_pause(1, "About to write pagedir2.");
+
+        temp_result = write_pageset(&pagedir2);
+
+        if (temp_result == -1 || test_result_state(TOI_ABORTED))
+                return 1;
+
+        toi_cond_pause(1, "About to copy pageset 1.");
+
+        if (test_result_state(TOI_ABORTED))
+                return 1;
+
+        toi_deactivate_storage(1);
+
+        toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
+
+        toi_in_hibernate = 1;
+
+        if (toi_go_atomic(PMSG_FREEZE, 1))
+                goto Failed;
+
+        temp_result = toi_hibernate();
+
+#ifdef CONFIG_KGDB
+        if (test_action_state(TOI_POST_RESUME_BREAKPOINT))
+                kgdb_breakpoint();
+#endif
+
+        if (!temp_result)
+                did_copy = 1;
+
+        /* We return here at resume time too! */
+        toi_end_atomic(ATOMIC_ALL_STEPS, toi_in_hibernate, temp_result);
+
+Failed:
+        if (toi_activate_storage(1))
+                panic("Failed to reactivate our storage.");
+
+        /* Resume time? */
+        if (!toi_in_hibernate) {
+                copyback_post();
+                return 0;
+        }
+
+        /* Nope. Hibernating. So, see if we can save the image... */
+
+        if (temp_result || test_result_state(TOI_ABORTED)) {
+                if (did_copy)
+                        goto abort_reloading_pagedir_two;
+                else
+                        return 1;
+        }
+
+        toi_update_status(pagedir2.size, pagedir1.size + pagedir2.size,
+                        NULL);
+
+        if (test_result_state(TOI_ABORTED))
+                goto abort_reloading_pagedir_two;
+
+        toi_cond_pause(1, "About to write pageset1.");
+
+        toi_message(TOI_ANY_SECTION, TOI_LOW, 1, "-- Writing pageset1");
+
+        temp_result = write_pageset(&pagedir1);
+
+        /* We didn't overwrite any memory, so no reread needs to be done. */
+        if (test_action_state(TOI_TEST_FILTER_SPEED) ||
+            test_action_state(TOI_TEST_BIO))
+                return 1;
+
+        if (temp_result == 1 || test_result_state(TOI_ABORTED))
+                goto abort_reloading_pagedir_two;
+
+        toi_cond_pause(1, "About to write header.");
+
+        if (test_result_state(TOI_ABORTED))
+                goto abort_reloading_pagedir_two;
+
+        temp_result = write_image_header();
+
+        if (!temp_result && !test_result_state(TOI_ABORTED))
+                return 0;
+
+abort_reloading_pagedir_two:
+        temp_result = read_pageset2(1);
+
+        /* If that failed, we're sunk. Panic! */
+        if (temp_result)
+                panic("Attempt to reload pagedir 2 while aborting "
+                                "a hibernate failed.");
+
+        return 1;
+}
+
+static void map_ps2_pages(int enable)
+{
+        unsigned long pfn = 0;
+
+        memory_bm_position_reset(pageset2_map);
+        pfn = memory_bm_next_pfn(pageset2_map, 0);
+
+        while (pfn != BM_END_OF_MAP) {
+                struct page *page = pfn_to_page(pfn);
+                kernel_map_pages(page, 1, enable);
+                pfn = memory_bm_next_pfn(pageset2_map, 0);
+        }
+}
+
+/**
+ * do_save_image - save the image and handle the result
+ *
+ * Save the prepared image. If we fail or we're in the path returning
+ * from the atomic restore, cleanup.
+ **/
+static int do_save_image(void)
+{
+        int result;
+        map_ps2_pages(0);
+        result = __save_image();
+        map_ps2_pages(1);
+        return result;
+}
+
+/**
+ * do_prepare_image - try to prepare an image
+ *
+ * Seek to initialise and prepare an image to be saved. On failure,
+ * cleanup.
+ **/
+static int do_prepare_image(void)
+{
+        int restarting = test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
+
+        if (!restarting && toi_activate_storage(0))
+                return 1;
+
+        /*
+         * If kept image and still keeping image and hibernating to RAM, (non
+         * incremental image case) we will return 1 after hibernating and
+         * resuming (provided the power doesn't run out. In that case, we skip
+         * directly to cleaning up and exiting.
+         */
+
+        if (!can_hibernate() ||
+            (test_result_state(TOI_KEPT_IMAGE) &&
+             check_still_keeping_image()))
+                return 1;
+
+        if (toi_init(restarting) || toi_prepare_image() ||
+                        test_result_state(TOI_ABORTED))
+                return 1;
+
+        trap_non_toi_io = 1;
+
+        return 0;
+}
+
+/**
+ * do_check_can_resume - find out whether an image has been stored
+ *
+ * Read whether an image exists. We use the same routine as the
+ * image_exists sysfs entry, and just look to see whether the
+ * first character in the resulting buffer is a '1'.
+ **/
+int do_check_can_resume(void)
+{
+        int result = -1;
+
+        if (toi_activate_storage(0))
+                return -1;
+
+        if (!test_toi_state(TOI_RESUME_DEVICE_OK))
+                toi_attempt_to_parse_resume_device(1);
+
+        if (toiActiveAllocator)
+                result = toiActiveAllocator->image_exists(1);
+
+        toi_deactivate_storage(0);
+        return result;
+}
+
+/**
+ * do_load_atomic_copy - load the first part of an image, if it exists
+ *
+ * Check whether we have an image. If one exists, do sanity checking
+ * (possibly invalidating the image or even rebooting if the user
+ * requests that) before loading it into memory in preparation for the
+ * atomic restore.
+ *
+ * If and only if we have an image loaded and ready to restore, we return 1.
+ **/
+static int do_load_atomic_copy(void)
+{
+        int read_image_result = 0;
+
+        if (sizeof(swp_entry_t) != sizeof(long)) {
+                printk(KERN_WARNING "TuxOnIce: The size of swp_entry_t != size"
+                        " of long. Please report this!\n");
+                return 1;
+        }
+
+        if (!resume_file[0])
+                printk(KERN_WARNING "TuxOnIce: "
+                        "You need to use a resume= command line parameter to "
+                        "tell TuxOnIce where to look for an image.\n");
+
+        toi_activate_storage(0);
+
+        if (!(test_toi_state(TOI_RESUME_DEVICE_OK)) &&
+                !toi_attempt_to_parse_resume_device(0)) {
+                /*
+                 * Without a usable storage device we can do nothing -
+                 * even if noresume is given
+                 */
+
+                if (!toiNumAllocators)
+                        printk(KERN_ALERT "TuxOnIce: "
+                          "No storage allocators have been registered.\n");
+                else
+                        printk(KERN_ALERT "TuxOnIce: "
+                                "Missing or invalid storage location "
+                                "(resume= parameter). Please correct and "
+                                "rerun lilo (or equivalent) before "
+                                "hibernating.\n");
+                toi_deactivate_storage(0);
+                return 1;
+        }
+
+        if (allocate_bitmaps())
+                return 1;
+
+        read_image_result = read_pageset1(); /* non fatal error ignored */
+
+        if (test_toi_state(TOI_NORESUME_SPECIFIED))
+                clear_toi_state(TOI_NORESUME_SPECIFIED);
+
+        toi_deactivate_storage(0);
+
+        if (read_image_result)
+                return 1;
+
+        return 0;
+}
+
+/**
+ * prepare_restore_load_alt_image - save & restore alt image variables
+ *
+ * Save and restore the pageset1 maps, when loading an alternate image.
+ **/
+static void prepare_restore_load_alt_image(int prepare)
+{
+        static struct memory_bitmap *pageset1_map_save, *pageset1_copy_map_save;
+
+        if (prepare) {
+                pageset1_map_save = pageset1_map;
+                pageset1_map = NULL;
+                pageset1_copy_map_save = pageset1_copy_map;
+                pageset1_copy_map = NULL;
+                set_toi_state(TOI_LOADING_ALT_IMAGE);
+                toi_reset_alt_image_pageset2_pfn();
+        } else {
+                toi_free_bitmap(&pageset1_map);
+                pageset1_map = pageset1_map_save;
+                toi_free_bitmap(&pageset1_copy_map);
+                pageset1_copy_map = pageset1_copy_map_save;
+                clear_toi_state(TOI_NOW_RESUMING);
+                clear_toi_state(TOI_LOADING_ALT_IMAGE);
+        }
+}
+
+/**
+ * do_toi_step - perform a step in hibernating or resuming
+ *
+ * Perform a step in hibernating or resuming an image. This abstraction
+ * is in preparation for implementing cluster support, and perhaps replacing
+ * uswsusp too (haven't looked whether that's possible yet).
+ **/
+int do_toi_step(int step)
+{
+        switch (step) {
+        case STEP_HIBERNATE_PREPARE_IMAGE:
+                return do_prepare_image();
+        case STEP_HIBERNATE_SAVE_IMAGE:
+                return do_save_image();
+        case STEP_HIBERNATE_POWERDOWN:
+                return do_post_image_write();
+        case STEP_RESUME_CAN_RESUME:
+                return do_check_can_resume();
+        case STEP_RESUME_LOAD_PS1:
+                return do_load_atomic_copy();
+        case STEP_RESUME_DO_RESTORE:
+                /*
+                 * If we succeed, this doesn't return.
+                 * Instead, we return from do_save_image() in the
+                 * hibernated kernel.
+                 */
+                return toi_atomic_restore();
+        case STEP_RESUME_ALT_IMAGE:
+                printk(KERN_INFO "Trying to resume alternate image.\n");
+                toi_in_hibernate = 0;
+                save_restore_alt_param(SAVE, NOQUIET);
+                prepare_restore_load_alt_image(1);
+                if (!do_check_can_resume()) {
+                        printk(KERN_INFO "Nothing to resume from.\n");
+                        goto out;
+                }
+                if (!do_load_atomic_copy())
+                        toi_atomic_restore();
+
+                printk(KERN_INFO "Failed to load image.\n");
+out:
+                prepare_restore_load_alt_image(0);
+                save_restore_alt_param(RESTORE, NOQUIET);
+                break;
+        case STEP_CLEANUP:
+                do_cleanup(1, 0);
+                break;
+        case STEP_QUIET_CLEANUP:
+                do_cleanup(0, 0);
+                break;
+        }
+
+        return 0;
+}
+
+/* -- Functions for kickstarting a hibernate or resume --- */
+
+/**
+ * toi_try_resume - try to do the steps in resuming
+ *
+ * Check if we have an image and if so try to resume. Clear the status
+ * flags too.
+ **/
+void toi_try_resume(void)
+{
+        set_toi_state(TOI_TRYING_TO_RESUME);
+        resume_attempted = 1;
+
+        current->flags |= PF_MEMALLOC;
+        toi_start_other_threads();
+
+        if (do_toi_step(STEP_RESUME_CAN_RESUME) &&
+                        !do_toi_step(STEP_RESUME_LOAD_PS1))
+                do_toi_step(STEP_RESUME_DO_RESTORE);
+
+        toi_stop_other_threads();
+        do_cleanup(0, 0);
+
+        current->flags &= ~PF_MEMALLOC;
+
+        clear_toi_state(TOI_IGNORE_LOGLEVEL);
+        clear_toi_state(TOI_TRYING_TO_RESUME);
+        clear_toi_state(TOI_NOW_RESUMING);
+}
+
+/**
+ * toi_sys_power_disk_try_resume - wrapper calling toi_try_resume
+ *
+ * Wrapper for when __toi_try_resume is called from swsusp resume path,
+ * rather than from echo > /sys/power/tuxonice/do_resume.
+ **/
+static void toi_sys_power_disk_try_resume(void)
+{
+        resume_attempted = 1;
+
+        /*
+         * There's a comment in kernel/power/disk.c that indicates
+         * we should be able to use mutex_lock_nested below. That
+         * doesn't seem to cut it, though, so let's just turn lockdep
+         * off for now.
+         */
+        lockdep_off();
+
+        if (toi_start_anything(SYSFS_RESUMING))
+                goto out;
+
+        toi_try_resume();
+
+        /*
+         * For initramfs, we have to clear the boot time
+         * flag after trying to resume
+         */
+        clear_toi_state(TOI_BOOT_TIME);
+
+        toi_finish_anything(SYSFS_RESUMING);
+out:
+        lockdep_on();
+}
+
+/**
+ * toi_try_hibernate - try to start a hibernation cycle
+ *
+ * Start a hibernation cycle, coming in from either
+ * echo > /sys/power/tuxonice/do_suspend
+ *
+ * or
+ *
+ * echo disk > /sys/power/state
+ *
+ * In the later case, we come in without pm_sem taken; in the
+ * former, it has been taken.
+ **/
+int toi_try_hibernate(void)
+{
+        int result = 0, sys_power_disk = 0, retries = 0;
+
+        if (!mutex_is_locked(&tuxonice_in_use)) {
+                /* Came in via /sys/power/disk */
+                if (toi_start_anything(SYSFS_HIBERNATING))
+                        return -EBUSY;
+                sys_power_disk = 1;
+        }
+
+        current->flags |= PF_MEMALLOC;
+
+        if (test_toi_state(TOI_CLUSTER_MODE)) {
+                toi_initiate_cluster_hibernate();
+                goto out;
+        }
+
+prepare:
+        result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
+
+        if (result)
+                goto out;
+
+        if (test_action_state(TOI_FREEZER_TEST))
+                goto out_restore_gfp_mask;
+
+        result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
+
+        if (test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL)) {
+                if (retries < 2) {
+                        do_cleanup(0, 1);
+                        retries++;
+                        clear_result_state(TOI_ABORTED);
+                        extra_pd1_pages_allowance = extra_pd1_pages_used + 500;
+                        printk(KERN_INFO "Automatically adjusting the extra"
+                                " pages allowance to %ld and restarting.\n",
+                                extra_pd1_pages_allowance);
+                        pm_restore_gfp_mask();
+                        goto prepare;
+                }
+
+                printk(KERN_INFO "Adjusted extra pages allowance twice and "
+                        "still couldn't hibernate successfully. Giving up.");
+        }
+
+        /* This code runs at resume time too! */
+        if (!result && toi_in_hibernate)
+                result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
+
+out_restore_gfp_mask:
+        pm_restore_gfp_mask();
+out:
+        do_cleanup(1, 0);
+        current->flags &= ~PF_MEMALLOC;
+
+        if (sys_power_disk)
+                toi_finish_anything(SYSFS_HIBERNATING);
+
+        return result;
+}
+
+/*
+ * channel_no: If !0, -c <channel_no> is added to args (userui).
+ */
+int toi_launch_userspace_program(char *command, int channel_no,
+                int wait, int debug)
+{
+        int retval;
+        static char *envp[] = {
+                        "HOME=/",
+                        "TERM=linux",
+                        "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+                        NULL };
+        static char *argv[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+                };
+        char *channel = NULL;
+        int arg = 0, size;
+        char test_read[255];
+        char *orig_posn = command;
+
+        if (!strlen(orig_posn))
+                return 1;
+
+        if (channel_no) {
+                channel = toi_kzalloc(4, 6, GFP_KERNEL);
+                if (!channel) {
+                        printk(KERN_INFO "Failed to allocate memory in "
+                                "preparing to launch userspace program.\n");
+                        return 1;
+                }
+        }
+
+        /* Up to 6 args supported */
+        while (arg < 6) {
+                sscanf(orig_posn, "%s", test_read);
+                size = strlen(test_read);
+                if (!(size))
+                        break;
+                argv[arg] = toi_kzalloc(5, size + 1, TOI_ATOMIC_GFP);
+                strcpy(argv[arg], test_read);
+                orig_posn += size + 1;
+                *test_read = 0;
+                arg++;
+        }
+
+        if (channel_no) {
+                sprintf(channel, "-c%d", channel_no);
+                argv[arg] = channel;
+        } else
+                arg--;
+
+        if (debug) {
+                argv[++arg] = toi_kzalloc(5, 8, TOI_ATOMIC_GFP);
+                strcpy(argv[arg], "--debug");
+        }
+
+        retval = call_usermodehelper(argv[0], argv, envp, wait);
+
+        /*
+         * If the program reports an error, retval = 256. Don't complain
+         * about that here.
+         */
+        if (retval && retval != 256)
+                printk(KERN_ERR "Failed to launch userspace program '%s': "
+                                "Error %d\n", command, retval);
+
+        {
+                int i;
+                for (i = 0; i < arg; i++)
+                        if (argv[i] && argv[i] != channel)
+                                toi_kfree(5, argv[i], sizeof(*argv[i]));
+        }
+
+        toi_kfree(4, channel, sizeof(*channel));
+
+        return retval;
+}
+
+/*
+ * This array contains entries that are automatically registered at
+ * boot. Modules and the console code register their own entries separately.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_LONG("extra_pages_allowance", SYSFS_RW,
+                        &extra_pd1_pages_allowance, 0, LONG_MAX, 0),
+        SYSFS_CUSTOM("image_exists", SYSFS_RW, image_exists_read,
+                        image_exists_write, SYSFS_NEEDS_SM_FOR_BOTH, NULL),
+        SYSFS_STRING("resume", SYSFS_RW, resume_file, 255,
+                        SYSFS_NEEDS_SM_FOR_WRITE,
+                        attempt_to_parse_resume_device2),
+        SYSFS_STRING("alt_resume_param", SYSFS_RW, alt_resume_param, 255,
+                        SYSFS_NEEDS_SM_FOR_WRITE,
+                        attempt_to_parse_alt_resume_param),
+        SYSFS_CUSTOM("debug_info", SYSFS_READONLY, get_toi_debug_info, NULL, 0,
+                        NULL),
+        SYSFS_BIT("ignore_rootfs", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_IGNORE_ROOTFS, 0),
+        SYSFS_LONG("image_size_limit", SYSFS_RW, &image_size_limit, -2,
+                        INT_MAX, 0),
+        SYSFS_UL("last_result", SYSFS_RW, &toi_result, 0, 0, 0),
+        SYSFS_BIT("no_multithreaded_io", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_NO_MULTITHREADED_IO, 0),
+        SYSFS_BIT("no_flusher_thread", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_NO_FLUSHER_THREAD, 0),
+        SYSFS_BIT("full_pageset2", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_PAGESET2_FULL, 0),
+        SYSFS_BIT("reboot", SYSFS_RW, &toi_bkd.toi_action, TOI_REBOOT, 0),
+        SYSFS_BIT("replace_swsusp", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_REPLACE_SWSUSP, 0),
+        SYSFS_STRING("resume_commandline", SYSFS_RW,
+                        toi_bkd.toi_nosave_commandline, COMMAND_LINE_SIZE, 0,
+                        NULL),
+        SYSFS_STRING("version", SYSFS_READONLY, TOI_CORE_VERSION, 0, 0, NULL),
+        SYSFS_BIT("freezer_test", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_FREEZER_TEST, 0),
+        SYSFS_BIT("test_bio", SYSFS_RW, &toi_bkd.toi_action, TOI_TEST_BIO, 0),
+        SYSFS_BIT("test_filter_speed", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_TEST_FILTER_SPEED, 0),
+        SYSFS_BIT("no_pageset2", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_NO_PAGESET2, 0),
+        SYSFS_BIT("no_pageset2_if_unneeded", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_NO_PS2_IF_UNNEEDED, 0),
+        SYSFS_STRING("binary_signature", SYSFS_READONLY,
+                        tuxonice_signature, 9, 0, NULL),
+        SYSFS_INT("max_workers", SYSFS_RW, &toi_max_workers, 0, NR_CPUS, 0,
+                        NULL),
+#ifdef CONFIG_KGDB
+        SYSFS_BIT("post_resume_breakpoint", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_POST_RESUME_BREAKPOINT, 0),
+#endif
+        SYSFS_BIT("no_readahead", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_NO_READAHEAD, 0),
+        SYSFS_BIT("trace_debug_on", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_TRACE_DEBUG_ON, 0),
+#ifdef CONFIG_TOI_KEEP_IMAGE
+        SYSFS_BIT("keep_image", SYSFS_RW , &toi_bkd.toi_action, TOI_KEEP_IMAGE,
+                        0),
+#endif
+#ifdef CONFIG_TOI_INCREMENTAL
+        SYSFS_CUSTOM("pagestate", SYSFS_READONLY, get_toi_page_state, NULL, 0,
+                        NULL),
+        SYSFS_BIT("incremental", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_INCREMENTAL_IMAGE, 1),
+#endif
+};
+
+static struct toi_core_fns my_fns = {
+        .get_nonconflicting_page = __toi_get_nonconflicting_page,
+        .post_context_save = __toi_post_context_save,
+        .try_hibernate = toi_try_hibernate,
+        .try_resume = toi_sys_power_disk_try_resume,
+};
+
+/**
+ * core_load - initialisation of TuxOnIce core
+ *
+ * Initialise the core, beginning with sysfs. Checksum and so on are part of
+ * the core, but have their own initialisation routines because they either
+ * aren't compiled in all the time or have their own subdirectories.
+ **/
+static __init int core_load(void)
+{
+        int i,
+            numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+        printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION
+                        " (http://tuxonice.net)\n");
+
+        if (!hibernation_available()) {
+          printk(KERN_INFO "TuxOnIce disabled due to request for hibernation"
+              " to be disabled in this kernel.\n");
+          return 1;
+        }
+
+        if (toi_sysfs_init())
+                return 1;
+
+        for (i = 0; i < numfiles; i++)
+                toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+
+        toi_core_fns = &my_fns;
+
+        if (toi_alloc_init())
+                return 1;
+        if (toi_checksum_init())
+                return 1;
+        if (toi_usm_init())
+                return 1;
+        if (toi_ui_init())
+                return 1;
+        if (toi_poweroff_init())
+                return 1;
+        if (toi_cluster_init())
+                return 1;
+        if (toi_cbw_init())
+                return 1;
+
+        return 0;
+}
+
+late_initcall(core_load);
diff -Nur linux-4.2/kernel/power/tuxonice_incremental.c linux-4.2-pck/kernel/power/tuxonice_incremental.c
--- linux-4.2/kernel/power/tuxonice_incremental.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_incremental.c	2015-09-08 11:20:32.467011976 -0300
@@ -0,0 +1,402 @@
+/*
+ * kernel/power/tuxonice_incremental.c
+ *
+ * Copyright (C) 2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains routines related to storing incremental images - that
+ * is, retaining an image after an initial cycle and then storing incremental
+ * changes on subsequent hibernations.
+ *
+ * Based in part on on...
+ *
+ * Debug helper to dump the current kernel pagetables of the system
+ * so that we can see what the various memory ranges are set to.
+ *
+ * (C) Copyright 2008 Intel Corporation
+ *
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/mm.h>
+#include <linux/tuxonice.h>
+#include <linux/sched.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/page.h>
+#include "tuxonice_pageflags.h"
+#include "tuxonice_builtin.h"
+#include "power.h"
+
+int toi_do_incremental_initcall;
+
+extern void kdb_init(int level);
+extern noinline void kgdb_breakpoint(void);
+
+#undef pr_debug
+#if 0
+#define pr_debug(a, b...) do { printk(a, ##b); } while(0)
+#else
+#define pr_debug(a, b...) do { } while(0)
+#endif
+
+/* Multipliers for offsets within the PTEs */
+#define PTE_LEVEL_MULT (PAGE_SIZE)
+#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
+#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
+#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
+
+/*
+ * This function gets called on a break in a continuous series
+ * of PTE entries; the next one is different so we need to
+ * print what we collected so far.
+ */
+static void note_page(void *addr)
+{
+    static struct page *lastpage;
+    struct page *page;
+
+    page = virt_to_page(addr);
+
+    if (page != lastpage) {
+        unsigned int level;
+        pte_t *pte = lookup_address((unsigned long) addr, &level);
+        struct page *pt_page2 = pte_page(*pte);
+        //debug("Note page %p (=> %p => %p|%ld).\n", addr, pte, pt_page2, page_to_pfn(pt_page2));
+        SetPageTOI_Untracked(pt_page2);
+        lastpage = page;
+    }
+}
+
+static void walk_pte_level(pmd_t addr)
+{
+        int i;
+        pte_t *start;
+
+        start = (pte_t *) pmd_page_vaddr(addr);
+        for (i = 0; i < PTRS_PER_PTE; i++) {
+                note_page(start);
+                start++;
+        }
+}
+
+#if PTRS_PER_PMD > 1
+
+static void walk_pmd_level(pud_t addr)
+{
+        int i;
+        pmd_t *start;
+
+        start = (pmd_t *) pud_page_vaddr(addr);
+        for (i = 0; i < PTRS_PER_PMD; i++) {
+                if (!pmd_none(*start)) {
+                        if (pmd_large(*start) || !pmd_present(*start))
+                                note_page(start);
+                        else
+                                walk_pte_level(*start);
+                } else
+                        note_page(start);
+                start++;
+        }
+}
+
+#else
+#define walk_pmd_level(a) walk_pte_level(__pmd(pud_val(a)))
+#define pud_large(a) pmd_large(__pmd(pud_val(a)))
+#define pud_none(a)  pmd_none(__pmd(pud_val(a)))
+#endif
+
+#if PTRS_PER_PUD > 1
+
+static void walk_pud_level(pgd_t addr)
+{
+        int i;
+        pud_t *start;
+
+        start = (pud_t *) pgd_page_vaddr(addr);
+
+        for (i = 0; i < PTRS_PER_PUD; i++) {
+                if (!pud_none(*start)) {
+                        if (pud_large(*start) || !pud_present(*start))
+                                note_page(start);
+                        else
+                                walk_pmd_level(*start);
+                } else
+                        note_page(start);
+
+                start++;
+        }
+}
+
+#else
+#define walk_pud_level(a) walk_pmd_level(__pud(pgd_val(a)))
+#define pgd_large(a) pud_large(__pud(pgd_val(a)))
+#define pgd_none(a)  pud_none(__pud(pgd_val(a)))
+#endif
+
+/*
+ * Not static in the original at the time of writing, so needs renaming here.
+ */
+static void toi_ptdump_walk_pgd_level(pgd_t *pgd)
+{
+#ifdef CONFIG_X86_64
+        pgd_t *start = (pgd_t *) &init_level4_pgt;
+#else
+        pgd_t *start = swapper_pg_dir;
+#endif
+        int i;
+        if (pgd) {
+                start = pgd;
+        }
+
+        for (i = 0; i < PTRS_PER_PGD; i++) {
+                if (!pgd_none(*start)) {
+                        if (pgd_large(*start) || !pgd_present(*start))
+                                note_page(start);
+                        else
+                                walk_pud_level(*start);
+                } else
+                        note_page(start);
+
+                start++;
+        }
+
+        /* Flush out the last page */
+        note_page(start);
+}
+
+#ifdef CONFIG_PARAVIRT
+extern struct pv_info pv_info;
+
+static void toi_set_paravirt_ops_untracked(void) {
+    int i;
+
+    unsigned long pvpfn = page_to_pfn(virt_to_page(__parainstructions)),
+                  pvpfn_end = page_to_pfn(virt_to_page(__parainstructions_end));
+    //debug(KERN_EMERG ".parainstructions goes from pfn %ld to %ld.\n", pvpfn, pvpfn_end);
+    for (i = pvpfn; i <= pvpfn_end; i++) {
+        SetPageTOI_Untracked(pfn_to_page(i));
+    }
+}
+#else
+#define toi_set_paravirt_ops_untracked() { do { } while(0) }
+#endif
+
+extern void toi_mark_per_cpus_pages_untracked(void);
+
+void toi_untrack_stack(unsigned long *stack)
+{
+    int i;
+    struct page *stack_page = virt_to_page(stack);
+
+    for (i = 0; i < (1 << THREAD_SIZE_ORDER); i++) {
+        pr_debug("Untrack stack page %p.\n", page_address(stack_page + i));
+        SetPageTOI_Untracked(stack_page + i);
+    }
+}
+void toi_untrack_process(struct task_struct *p)
+{
+    SetPageTOI_Untracked(virt_to_page(p));
+    pr_debug("Untrack process %d page %p.\n", p->pid, page_address(virt_to_page(p)));
+
+    toi_untrack_stack(p->stack);
+}
+
+void toi_generate_untracked_map(void)
+{
+    struct task_struct *p, *t;
+    struct page *page;
+    pte_t *pte;
+    int i;
+    unsigned int level;
+    static int been_here = 0;
+
+    if (been_here)
+        return;
+
+    been_here = 1;
+
+    /* Pagetable pages */
+    toi_ptdump_walk_pgd_level(NULL);
+
+    /* Printk buffer - not normally needed but can be helpful for debugging. */
+    //toi_set_logbuf_untracked();
+
+    /* Paravirt ops */
+    toi_set_paravirt_ops_untracked();
+
+    /* Task structs and stacks */
+    for_each_process_thread(p, t) {
+        toi_untrack_process(p);
+        //toi_untrack_stack((unsigned long *) t->thread.sp);
+    }
+
+    for (i = 0; i < NR_CPUS; i++) {
+        struct task_struct *idle = idle_task(i);
+
+        if (idle) {
+            pr_debug("Untrack idle process for CPU %d.\n", i);
+            toi_untrack_process(idle);
+        }
+
+        /* IRQ stack */
+        pr_debug("Untrack IRQ stack for CPU %d.\n", i);
+        toi_untrack_stack((unsigned long *)per_cpu(irq_stack_ptr, i));
+    }
+
+    /* Per CPU data */
+    //pr_debug("Untracking per CPU variable pages.\n");
+    toi_mark_per_cpus_pages_untracked();
+
+    /* Init stack - for bringing up secondary CPUs */
+    page = virt_to_page(init_stack);
+    for (i = 0; i < DIV_ROUND_UP(sizeof(init_stack), PAGE_SIZE); i++) {
+        SetPageTOI_Untracked(page + i);
+    }
+
+    pte = lookup_address((unsigned long) &mmu_cr4_features, &level);
+    SetPageTOI_Untracked(pte_page(*pte));
+    SetPageTOI_Untracked(virt_to_page(trampoline_cr4_features));
+}
+
+/**
+ * toi_reset_dirtiness_one
+ */
+
+void toi_reset_dirtiness_one(unsigned long pfn, int verbose)
+{
+    struct page *page = pfn_to_page(pfn);
+
+    /**
+     * Don't worry about whether the Dirty flag is
+     * already set. If this is our first call, it
+     * won't be.
+     */
+
+    preempt_disable();
+
+    ClearPageTOI_Dirty(page);
+    SetPageTOI_RO(page);
+    if (verbose)
+        printk(KERN_EMERG "Making page %ld (%p|%p) read only.\n", pfn, page, page_address(page));
+
+    set_memory_ro((unsigned long) page_address(page), 1);
+
+    preempt_enable();
+}
+
+/**
+ * TuxOnIce's incremental image support works by marking all memory apart from
+ * the page tables read-only, then in the page-faults that result enabling
+ * writing if appropriate and flagging the page as dirty. Free pages are also
+ * marked as dirty and not protected so that if allocated, they will be included
+ * in the image without further processing.
+ *
+ * toi_reset_dirtiness is called when and image exists and incremental images are
+ * enabled, and each time we resume thereafter. It is not invoked on a fresh boot.
+ *
+ * This routine should be called from a single-cpu-running context to avoid races in setting
+ * page dirty/read only flags.
+ *
+ * TODO: Make "it is not invoked on a fresh boot" true  when I've finished developing it!
+ *
+ * TODO: Consider Xen paravirt guest boot issues. See arch/x86/mm/pageattr.c.
+ **/
+
+int toi_reset_dirtiness(int verbose)
+{
+        struct zone *zone;
+        unsigned long loop;
+        int allocated_map = 0;
+
+        toi_generate_untracked_map();
+
+        if (!free_map) {
+            if (!toi_alloc_bitmap(&free_map))
+                return -ENOMEM;
+            allocated_map = 1;
+        }
+
+        toi_generate_free_page_map();
+
+        pr_debug(KERN_EMERG "Reset dirtiness.\n");
+        for_each_populated_zone(zone) {
+            // 64 bit only. No need to worry about highmem.
+            for (loop = 0; loop < zone->spanned_pages; loop++) {
+                unsigned long pfn = zone->zone_start_pfn + loop;
+                struct page *page;
+                int chunk_size;
+
+                if (!pfn_valid(pfn)) {
+                    continue;
+                }
+
+                chunk_size = toi_size_of_free_region(zone, pfn);
+                if (chunk_size) {
+                    loop += chunk_size - 1;
+                    continue;
+                }
+
+                page = pfn_to_page(pfn);
+
+                if (PageNosave(page) || !saveable_page(zone, pfn)) {
+                    continue;
+                }
+
+                if (PageTOI_Untracked(page)) {
+                    continue;
+                }
+
+                /**
+                 * Do we need to (re)protect the page?
+                 * If it is already protected (PageTOI_RO), there is
+                 * nothing to do - skip the following.
+                 * If it is marked as dirty (PageTOI_Dirty), it was
+                 * either free and has been allocated or has been
+                 * written to and marked dirty. Reset the dirty flag
+                 * and (re)apply the protection.
+                 */
+                if (!PageTOI_RO(page)) {
+                    toi_reset_dirtiness_one(pfn, verbose);
+                }
+            }
+        }
+
+        pr_debug(KERN_EMERG "Done resetting dirtiness.\n");
+
+        if (allocated_map) {
+            toi_free_bitmap(&free_map);
+        }
+        return 0;
+}
+
+static int toi_reset_dirtiness_initcall(void)
+{
+    if (toi_do_incremental_initcall) {
+        pr_info("TuxOnIce: Enabling dirty page tracking.\n");
+        toi_reset_dirtiness(0);
+    }
+    return 1;
+}
+extern void toi_generate_untracked_map(void);
+
+// Leave early_initcall for pages to register untracked sections.
+early_initcall(toi_reset_dirtiness_initcall);
+
+static int __init toi_incremental_initcall_setup(char *str)
+{
+        int value;
+
+        if (sscanf(str, "=%d", &value) && value)
+                toi_do_incremental_initcall = value;
+
+        return 1;
+}
+__setup("toi_incremental_initcall", toi_incremental_initcall_setup);
diff -Nur linux-4.2/kernel/power/tuxonice_io.c linux-4.2-pck/kernel/power/tuxonice_io.c
--- linux-4.2/kernel/power/tuxonice_io.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_io.c	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,1932 @@
+/*
+ * kernel/power/tuxonice_io.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains high level IO routines for hibernating.
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/version.h>
+#include <linux/utsname.h>
+#include <linux/mount.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/cpu.h>
+#include <linux/fs_struct.h>
+#include <linux/bio.h>
+#include <linux/fs_uuid.h>
+#include <linux/kmod.h>
+#include <asm/tlbflush.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_alloc.h"
+char alt_resume_param[256];
+
+/* Version read from image header at resume */
+static int toi_image_header_version;
+
+#define read_if_version(VERS, VAR, DESC, ERR_ACT) do {                                        \
+        if (likely(toi_image_header_version >= VERS))                                \
+                if (toiActiveAllocator->rw_header_chunk(READ, NULL,                \
+                                        (char *) &VAR, sizeof(VAR))) {                \
+                        abort_hibernate(TOI_FAILED_IO, "Failed to read DESC.");        \
+                        ERR_ACT;                                        \
+                }                                                                \
+} while(0)                                                                        \
+
+/* Variables shared between threads and updated under the mutex */
+static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result;
+static int io_index, io_nextupdate, io_pc, io_pc_step;
+static DEFINE_MUTEX(io_mutex);
+static DEFINE_PER_CPU(struct page *, last_sought);
+static DEFINE_PER_CPU(struct page *, last_high_page);
+static DEFINE_PER_CPU(char *, checksum_locn);
+static DEFINE_PER_CPU(struct pbe *, last_low_page);
+static atomic_t io_count;
+atomic_t toi_io_workers;
+
+static int using_flusher;
+
+DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher);
+
+int toi_bio_queue_flusher_should_finish;
+
+int toi_max_workers;
+
+static char *image_version_error = "The image header version is newer than " \
+        "this kernel supports.";
+
+struct toi_module_ops *first_filter;
+
+static atomic_t toi_num_other_threads;
+static DECLARE_WAIT_QUEUE_HEAD(toi_worker_wait_queue);
+enum toi_worker_commands {
+        TOI_IO_WORKER_STOP,
+        TOI_IO_WORKER_RUN,
+        TOI_IO_WORKER_EXIT
+};
+static enum toi_worker_commands toi_worker_command;
+
+/**
+ * toi_attempt_to_parse_resume_device - determine if we can hibernate
+ *
+ * Can we hibernate, using the current resume= parameter?
+ **/
+int toi_attempt_to_parse_resume_device(int quiet)
+{
+        struct list_head *Allocator;
+        struct toi_module_ops *thisAllocator;
+        int result, returning = 0;
+
+        if (toi_activate_storage(0))
+                return 0;
+
+        toiActiveAllocator = NULL;
+        clear_toi_state(TOI_RESUME_DEVICE_OK);
+        clear_toi_state(TOI_CAN_RESUME);
+        clear_result_state(TOI_ABORTED);
+
+        if (!toiNumAllocators) {
+                if (!quiet)
+                        printk(KERN_INFO "TuxOnIce: No storage allocators have "
+                                "been registered. Hibernating will be "
+                                "disabled.\n");
+                goto cleanup;
+        }
+
+        list_for_each(Allocator, &toiAllocators) {
+                thisAllocator = list_entry(Allocator, struct toi_module_ops,
+                                                                type_list);
+
+                /*
+                 * Not sure why you'd want to disable an allocator, but
+                 * we should honour the flag if we're providing it
+                 */
+                if (!thisAllocator->enabled)
+                        continue;
+
+                result = thisAllocator->parse_sig_location(
+                                resume_file, (toiNumAllocators == 1),
+                                quiet);
+
+                switch (result) {
+                case -EINVAL:
+                        /* For this allocator, but not a valid
+                         * configuration. Error already printed. */
+                        goto cleanup;
+
+                case 0:
+                        /* For this allocator and valid. */
+                        toiActiveAllocator = thisAllocator;
+
+                        set_toi_state(TOI_RESUME_DEVICE_OK);
+                        set_toi_state(TOI_CAN_RESUME);
+                        returning = 1;
+                        goto cleanup;
+                }
+        }
+        if (!quiet)
+                printk(KERN_INFO "TuxOnIce: No matching enabled allocator "
+                                "found. Resuming disabled.\n");
+cleanup:
+        toi_deactivate_storage(0);
+        return returning;
+}
+
+void attempt_to_parse_resume_device2(void)
+{
+        toi_prepare_usm();
+        toi_attempt_to_parse_resume_device(0);
+        toi_cleanup_usm();
+}
+
+void save_restore_alt_param(int replace, int quiet)
+{
+        static char resume_param_save[255];
+        static unsigned long toi_state_save;
+
+        if (replace) {
+                toi_state_save = toi_state;
+                strcpy(resume_param_save, resume_file);
+                strcpy(resume_file, alt_resume_param);
+        } else {
+                strcpy(resume_file, resume_param_save);
+                toi_state = toi_state_save;
+        }
+        toi_attempt_to_parse_resume_device(quiet);
+}
+
+void attempt_to_parse_alt_resume_param(void)
+{
+        int ok = 0;
+
+        /* Temporarily set resume_param to the poweroff value */
+        if (!strlen(alt_resume_param))
+                return;
+
+        printk(KERN_INFO "=== Trying Poweroff Resume2 ===\n");
+        save_restore_alt_param(SAVE, NOQUIET);
+        if (test_toi_state(TOI_CAN_RESUME))
+                ok = 1;
+
+        printk(KERN_INFO "=== Done ===\n");
+        save_restore_alt_param(RESTORE, QUIET);
+
+        /* If not ok, clear the string */
+        if (ok)
+                return;
+
+        printk(KERN_INFO "Can't resume from that location; clearing "
+                        "alt_resume_param.\n");
+        alt_resume_param[0] = '\0';
+}
+
+/**
+ * noresume_reset_modules - reset data structures in case of non resuming
+ *
+ * When we read the start of an image, modules (and especially the
+ * active allocator) might need to reset data structures if we
+ * decide to remove the image rather than resuming from it.
+ **/
+static void noresume_reset_modules(void)
+{
+        struct toi_module_ops *this_filter;
+
+        list_for_each_entry(this_filter, &toi_filters, type_list)
+                if (this_filter->noresume_reset)
+                        this_filter->noresume_reset();
+
+        if (toiActiveAllocator && toiActiveAllocator->noresume_reset)
+                toiActiveAllocator->noresume_reset();
+}
+
+/**
+ * fill_toi_header - fill the hibernate header structure
+ * @struct toi_header: Header data structure to be filled.
+ **/
+static int fill_toi_header(struct toi_header *sh)
+{
+        int i, error;
+
+        error = init_header((struct swsusp_info *) sh);
+        if (error)
+                return error;
+
+        sh->pagedir = pagedir1;
+        sh->pageset_2_size = pagedir2.size;
+        sh->param0 = toi_result;
+        sh->param1 = toi_bkd.toi_action;
+        sh->param2 = toi_bkd.toi_debug_state;
+        sh->param3 = toi_bkd.toi_default_console_level;
+        sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev;
+        for (i = 0; i < 4; i++)
+                sh->io_time[i/2][i%2] = toi_bkd.toi_io_time[i/2][i%2];
+        sh->bkd = boot_kernel_data_buffer;
+        return 0;
+}
+
+/**
+ * rw_init_modules - initialize modules
+ * @rw:                Whether we are reading of writing an image.
+ * @which:        Section of the image being processed.
+ *
+ * Iterate over modules, preparing the ones that will be used to read or write
+ * data.
+ **/
+static int rw_init_modules(int rw, int which)
+{
+        struct toi_module_ops *this_module;
+        /* Initialise page transformers */
+        list_for_each_entry(this_module, &toi_filters, type_list) {
+                if (!this_module->enabled)
+                        continue;
+                if (this_module->rw_init && this_module->rw_init(rw, which)) {
+                        abort_hibernate(TOI_FAILED_MODULE_INIT,
+                                "Failed to initialize the %s filter.",
+                                this_module->name);
+                        return 1;
+                }
+        }
+
+        /* Initialise allocator */
+        if (toiActiveAllocator->rw_init(rw, which)) {
+                abort_hibernate(TOI_FAILED_MODULE_INIT,
+                                "Failed to initialise the allocator.");
+                return 1;
+        }
+
+        /* Initialise other modules */
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!this_module->enabled ||
+                    this_module->type == FILTER_MODULE ||
+                    this_module->type == WRITER_MODULE)
+                        continue;
+                if (this_module->rw_init && this_module->rw_init(rw, which)) {
+                        set_abort_result(TOI_FAILED_MODULE_INIT);
+                        printk(KERN_INFO "Setting aborted flag due to module "
+                                        "init failure.\n");
+                        return 1;
+                }
+        }
+
+        return 0;
+}
+
+/**
+ * rw_cleanup_modules - cleanup modules
+ * @rw:        Whether we are reading of writing an image.
+ *
+ * Cleanup components after reading or writing a set of pages.
+ * Only the allocator may fail.
+ **/
+static int rw_cleanup_modules(int rw)
+{
+        struct toi_module_ops *this_module;
+        int result = 0;
+
+        /* Cleanup other modules */
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!this_module->enabled ||
+                    this_module->type == FILTER_MODULE ||
+                    this_module->type == WRITER_MODULE)
+                        continue;
+                if (this_module->rw_cleanup)
+                        result |= this_module->rw_cleanup(rw);
+        }
+
+        /* Flush data and cleanup */
+        list_for_each_entry(this_module, &toi_filters, type_list) {
+                if (!this_module->enabled)
+                        continue;
+                if (this_module->rw_cleanup)
+                        result |= this_module->rw_cleanup(rw);
+        }
+
+        result |= toiActiveAllocator->rw_cleanup(rw);
+
+        return result;
+}
+
+static struct page *copy_page_from_orig_page(struct page *orig_page, int is_high)
+{
+        int index, min, max;
+        struct page *high_page = NULL,
+                    **my_last_high_page = raw_cpu_ptr(&last_high_page),
+                    **my_last_sought = raw_cpu_ptr(&last_sought);
+        struct pbe *this, **my_last_low_page = raw_cpu_ptr(&last_low_page);
+        void *compare;
+
+        if (is_high) {
+                if (*my_last_sought && *my_last_high_page &&
+                                *my_last_sought < orig_page)
+                        high_page = *my_last_high_page;
+                else
+                        high_page = (struct page *) restore_highmem_pblist;
+                this = (struct pbe *) kmap(high_page);
+                compare = orig_page;
+        } else {
+                if (*my_last_sought && *my_last_low_page &&
+                                *my_last_sought < orig_page)
+                        this = *my_last_low_page;
+                else
+                        this = restore_pblist;
+                compare = page_address(orig_page);
+        }
+
+        *my_last_sought = orig_page;
+
+        /* Locate page containing pbe */
+        while (this[PBES_PER_PAGE - 1].next &&
+                        this[PBES_PER_PAGE - 1].orig_address < compare) {
+                if (is_high) {
+                        struct page *next_high_page = (struct page *)
+                                this[PBES_PER_PAGE - 1].next;
+                        kunmap(high_page);
+                        this = kmap(next_high_page);
+                        high_page = next_high_page;
+                } else
+                        this = this[PBES_PER_PAGE - 1].next;
+        }
+
+        /* Do a binary search within the page */
+        min = 0;
+        max = PBES_PER_PAGE;
+        index = PBES_PER_PAGE / 2;
+        while (max - min) {
+                if (!this[index].orig_address ||
+                    this[index].orig_address > compare)
+                        max = index;
+                else if (this[index].orig_address == compare) {
+                        if (is_high) {
+                                struct page *page = this[index].address;
+                                *my_last_high_page = high_page;
+                                kunmap(high_page);
+                                return page;
+                        }
+                        *my_last_low_page = this;
+                        return virt_to_page(this[index].address);
+                } else
+                        min = index;
+                index = ((max + min) / 2);
+        };
+
+        if (is_high)
+                kunmap(high_page);
+
+        abort_hibernate(TOI_FAILED_IO, "Failed to get destination page for"
+                " orig page %p. This[min].orig_address=%p.\n", orig_page,
+                this[index].orig_address);
+        return NULL;
+}
+
+/**
+ * write_next_page - write the next page in a pageset
+ * @data_pfn: The pfn where the next data to write is located.
+ * @my_io_index: The index of the page in the pageset.
+ * @write_pfn: The pfn number to write in the image (where the data belongs).
+ *
+ * Get the pfn of the next page to write, map the page if necessary and do the
+ * write.
+ **/
+static int write_next_page(unsigned long *data_pfn, int *my_io_index,
+                unsigned long *write_pfn)
+{
+        struct page *page;
+        char **my_checksum_locn = raw_cpu_ptr(&checksum_locn);
+        int result = 0, was_present;
+
+        *data_pfn = memory_bm_next_pfn(io_map, 0);
+
+        /* Another thread could have beaten us to it. */
+        if (*data_pfn == BM_END_OF_MAP) {
+                if (atomic_read(&io_count)) {
+                        printk(KERN_INFO "Ran out of pfns but io_count is "
+                                        "still %d.\n", atomic_read(&io_count));
+                        BUG();
+                }
+                mutex_unlock(&io_mutex);
+                return -ENODATA;
+        }
+
+        *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
+
+        memory_bm_clear_bit(io_map, 0, *data_pfn);
+        page = pfn_to_page(*data_pfn);
+
+        was_present = kernel_page_present(page);
+        if (!was_present)
+                kernel_map_pages(page, 1, 1);
+
+        if (io_pageset == 1)
+                *write_pfn = memory_bm_next_pfn(pageset1_map, 0);
+        else {
+                *write_pfn = *data_pfn;
+                *my_checksum_locn = tuxonice_get_next_checksum();
+        }
+
+        TOI_TRACE_DEBUG(*data_pfn, "_PS%d_write %d", io_pageset, *my_io_index);
+
+        mutex_unlock(&io_mutex);
+
+        if (io_pageset == 2 && tuxonice_calc_checksum(page, *my_checksum_locn))
+                return 1;
+
+        result = first_filter->write_page(*write_pfn, TOI_PAGE, page,
+                        PAGE_SIZE);
+
+        if (!was_present)
+                kernel_map_pages(page, 1, 0);
+
+        return result;
+}
+
+/**
+ * read_next_page - read the next page in a pageset
+ * @my_io_index: The index of the page in the pageset.
+ * @write_pfn: The pfn in which the data belongs.
+ *
+ * Read a page of the image into our buffer. It can happen (here and in the
+ * write routine) that threads don't get run until after other CPUs have done
+ * all the work. This was the cause of the long standing issue with
+ * occasionally getting -ENODATA errors at the end of reading the image. We
+ * therefore need to check there's actually a page to read before trying to
+ * retrieve one.
+ **/
+
+static int read_next_page(int *my_io_index, unsigned long *write_pfn,
+                struct page *buffer)
+{
+        unsigned int buf_size = PAGE_SIZE;
+        unsigned long left = atomic_read(&io_count);
+
+        if (!left)
+                return -ENODATA;
+
+        /* Start off assuming the page we read isn't resaved */
+        *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
+
+        mutex_unlock(&io_mutex);
+
+        /*
+         * Are we aborting? If so, don't submit any more I/O as
+         * resetting the resume_attempted flag (from ui.c) will
+         * clear the bdev flags, making this thread oops.
+         */
+        if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
+                atomic_dec(&toi_io_workers);
+                if (!atomic_read(&toi_io_workers)) {
+                        /*
+                         * So we can be sure we'll have memory for
+                         * marking that we haven't resumed.
+                         */
+                        rw_cleanup_modules(READ);
+                        set_toi_state(TOI_IO_STOPPED);
+                }
+                while (1)
+                        schedule();
+        }
+
+        /*
+         * See toi_bio_read_page in tuxonice_bio.c:
+         * read the next page in the image.
+         */
+        return first_filter->read_page(write_pfn, TOI_PAGE, buffer, &buf_size);
+}
+
+static void use_read_page(unsigned long write_pfn, struct page *buffer)
+{
+        struct page *final_page = pfn_to_page(write_pfn),
+                    *copy_page = final_page;
+        char *virt, *buffer_virt;
+        int was_present, cpu = smp_processor_id();
+        unsigned long idx = 0;
+
+        if (io_pageset == 1 && (!pageset1_copy_map ||
+                        !memory_bm_test_bit(pageset1_copy_map, cpu, write_pfn))) {
+                int is_high = PageHighMem(final_page);
+                copy_page = copy_page_from_orig_page(is_high ? (void *) write_pfn : final_page, is_high);
+        }
+
+        if (!memory_bm_test_bit(io_map, cpu, write_pfn)) {
+                int test = !memory_bm_test_bit(io_map, cpu, write_pfn);
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Discard %ld (%d).", write_pfn, test);
+                mutex_lock(&io_mutex);
+                idx = atomic_add_return(1, &io_count);
+                mutex_unlock(&io_mutex);
+                return;
+        }
+
+        virt = kmap(copy_page);
+        buffer_virt = kmap(buffer);
+        was_present = kernel_page_present(copy_page);
+        if (!was_present)
+                kernel_map_pages(copy_page, 1, 1);
+        memcpy(virt, buffer_virt, PAGE_SIZE);
+        if (!was_present)
+                kernel_map_pages(copy_page, 1, 0);
+        kunmap(copy_page);
+        kunmap(buffer);
+        memory_bm_clear_bit(io_map, cpu, write_pfn);
+        TOI_TRACE_DEBUG(write_pfn, "_PS%d_read", io_pageset);
+}
+
+static unsigned long status_update(int writing, unsigned long done,
+                unsigned long ticks)
+{
+        int cs_index = writing ? 0 : 1;
+        unsigned long ticks_so_far = toi_bkd.toi_io_time[cs_index][1] + ticks;
+        unsigned long msec = jiffies_to_msecs(abs(ticks_so_far));
+        unsigned long pgs_per_s, estimate = 0, pages_left;
+
+        if (msec) {
+                pages_left = io_barmax - done;
+                pgs_per_s = 1000 * done / msec;
+                if (pgs_per_s)
+                        estimate = DIV_ROUND_UP(pages_left, pgs_per_s);
+        }
+
+        if (estimate && ticks > HZ / 2)
+                return toi_update_status(done, io_barmax,
+                        " %d/%d MB (%lu sec left)",
+                        MB(done+1), MB(io_barmax), estimate);
+
+        return toi_update_status(done, io_barmax, " %d/%d MB",
+                MB(done+1), MB(io_barmax));
+}
+
+/**
+ * worker_rw_loop - main loop to read/write pages
+ *
+ * The main I/O loop for reading or writing pages. The io_map bitmap is used to
+ * track the pages to read/write.
+ * If we are reading, the pages are loaded to their final (mapped) pfn.
+ * Data is non zero iff this is a thread started via start_other_threads.
+ * In that case, we stay in here until told to quit.
+ **/
+static int worker_rw_loop(void *data)
+{
+        unsigned long data_pfn, write_pfn, next_jiffies = jiffies + HZ / 4,
+                      jif_index = 1, start_time = jiffies, thread_num;
+        int result = 0, my_io_index = 0, last_worker;
+        struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP);
+        cpumask_var_t orig_mask;
+
+        if (!alloc_cpumask_var(&orig_mask, GFP_KERNEL)) {
+                printk(KERN_EMERG "Failed to allocate cpumask for TuxOnIce I/O thread %ld.\n", (unsigned long) data);
+                result = -ENOMEM;
+                goto out;
+        }
+
+        cpumask_copy(orig_mask, tsk_cpus_allowed(current));
+
+        current->flags |= PF_NOFREEZE;
+
+top:
+        mutex_lock(&io_mutex);
+        thread_num = atomic_read(&toi_io_workers);
+
+        cpumask_copy(tsk_cpus_allowed(current), orig_mask);
+        schedule();
+
+        atomic_inc(&toi_io_workers);
+
+        while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) &&
+                !(io_write && test_result_state(TOI_ABORTED)) &&
+                toi_worker_command == TOI_IO_WORKER_RUN) {
+                if (!thread_num && jiffies > next_jiffies) {
+                        next_jiffies += HZ / 4;
+                        if (toiActiveAllocator->update_throughput_throttle)
+                                toiActiveAllocator->update_throughput_throttle(
+                                                jif_index);
+                        jif_index++;
+                }
+
+                /*
+                 * What page to use? If reading, don't know yet which page's
+                 * data will be read, so always use the buffer. If writing,
+                 * use the copy (Pageset1) or original page (Pageset2), but
+                 * always write the pfn of the original page.
+                 */
+                if (io_write)
+                        result = write_next_page(&data_pfn, &my_io_index,
+                                        &write_pfn);
+                else /* Reading */
+                        result = read_next_page(&my_io_index, &write_pfn,
+                                        buffer);
+
+                if (result) {
+                        mutex_lock(&io_mutex);
+                        /* Nothing to do? */
+                        if (result == -ENODATA) {
+                                toi_message(TOI_IO, TOI_VERBOSE, 0,
+                                        "Thread %d has no more work.",
+                                        smp_processor_id());
+                                break;
+                        }
+
+                        io_result = result;
+
+                        if (io_write) {
+                                printk(KERN_INFO "Write chunk returned %d.\n",
+                                                result);
+                                abort_hibernate(TOI_FAILED_IO,
+                                        "Failed to write a chunk of the "
+                                        "image.");
+                                break;
+                        }
+
+                        if (io_pageset == 1) {
+                                printk(KERN_ERR "\nBreaking out of I/O loop "
+                                        "because of result code %d.\n", result);
+                                break;
+                        }
+                        panic("Read chunk returned (%d)", result);
+                }
+
+                /*
+                 * Discard reads of resaved pages while reading ps2
+                 * and unwanted pages while rereading ps2 when aborting.
+                 */
+                if (!io_write) {
+                        if (!PageResave(pfn_to_page(write_pfn)))
+                                use_read_page(write_pfn, buffer);
+                        else {
+                                mutex_lock(&io_mutex);
+                                toi_message(TOI_IO, TOI_VERBOSE, 0,
+                                                "Resaved %ld.", write_pfn);
+                                atomic_inc(&io_count);
+                                mutex_unlock(&io_mutex);
+                        }
+                }
+
+                if (!thread_num) {
+                        if(my_io_index + io_base > io_nextupdate)
+                                io_nextupdate = status_update(io_write,
+                                                my_io_index + io_base,
+                                                jiffies - start_time);
+
+                        if (my_io_index > io_pc) {
+                                printk(KERN_CONT "...%d%%", 20 * io_pc_step);
+                                io_pc_step++;
+                                io_pc = io_finish_at * io_pc_step / 5;
+                        }
+                }
+
+                toi_cond_pause(0, NULL);
+
+                /*
+                 * Subtle: If there's less I/O still to be done than threads
+                 * running, quit. This stops us doing I/O beyond the end of
+                 * the image when reading.
+                 *
+                 * Possible race condition. Two threads could do the test at
+                 * the same time; one should exit and one should continue.
+                 * Therefore we take the mutex before comparing and exiting.
+                 */
+
+                mutex_lock(&io_mutex);
+        }
+
+        last_worker = atomic_dec_and_test(&toi_io_workers);
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "%d workers left.", atomic_read(&toi_io_workers));
+        mutex_unlock(&io_mutex);
+
+        if ((unsigned long) data && toi_worker_command != TOI_IO_WORKER_EXIT) {
+                /* Were we the last thread and we're using a flusher thread? */
+                if (last_worker && using_flusher) {
+                        toiActiveAllocator->finish_all_io();
+                }
+                /* First, if we're doing I/O, wait for it to finish */
+                wait_event(toi_worker_wait_queue, toi_worker_command != TOI_IO_WORKER_RUN);
+                /* Then wait to be told what to do next */
+                wait_event(toi_worker_wait_queue, toi_worker_command != TOI_IO_WORKER_STOP);
+                if (toi_worker_command == TOI_IO_WORKER_RUN)
+                        goto top;
+        }
+
+        if (thread_num)
+                atomic_dec(&toi_num_other_threads);
+
+out:
+        toi_message(TOI_IO, TOI_LOW, 0, "Thread %d exiting.", thread_num);
+        toi__free_page(28, buffer);
+        free_cpumask_var(orig_mask);
+
+        return result;
+}
+
+int toi_start_other_threads(void)
+{
+        int cpu;
+        struct task_struct *p;
+        int to_start = (toi_max_workers ? toi_max_workers : num_online_cpus()) - 1;
+  unsigned long num_started = 0;
+
+        if (test_action_state(TOI_NO_MULTITHREADED_IO))
+                return 0;
+
+        toi_worker_command = TOI_IO_WORKER_STOP;
+
+        for_each_online_cpu(cpu) {
+                if (num_started == to_start)
+                        break;
+
+                if (cpu == smp_processor_id())
+                        continue;
+
+                p = kthread_create_on_node(worker_rw_loop, (void *) num_started + 1,
+                                cpu_to_node(cpu), "ktoi_io/%d", cpu);
+                if (IS_ERR(p)) {
+                        printk(KERN_ERR "ktoi_io for %i failed\n", cpu);
+                        continue;
+                }
+                kthread_bind(p, cpu);
+                p->flags |= PF_MEMALLOC;
+                wake_up_process(p);
+                num_started++;
+                atomic_inc(&toi_num_other_threads);
+        }
+
+        toi_message(TOI_IO, TOI_LOW, 0, "Started %d threads.", num_started);
+        return num_started;
+}
+
+void toi_stop_other_threads(void)
+{
+        toi_message(TOI_IO, TOI_LOW, 0, "Stopping other threads.");
+        toi_worker_command = TOI_IO_WORKER_EXIT;
+        wake_up(&toi_worker_wait_queue);
+}
+
+/**
+ * do_rw_loop - main highlevel function for reading or writing pages
+ *
+ * Create the io_map bitmap and call worker_rw_loop to perform I/O operations.
+ **/
+static int do_rw_loop(int write, int finish_at, struct memory_bitmap *pageflags,
+                int base, int barmax, int pageset)
+{
+        int index = 0, cpu, result = 0, workers_started;
+        unsigned long pfn, next;
+
+        first_filter = toi_get_next_filter(NULL);
+
+        if (!finish_at)
+                return 0;
+
+        io_write = write;
+        io_finish_at = finish_at;
+        io_base = base;
+        io_barmax = barmax;
+        io_pageset = pageset;
+        io_index = 0;
+        io_pc = io_finish_at / 5;
+        io_pc_step = 1;
+        io_result = 0;
+        io_nextupdate = base + 1;
+        toi_bio_queue_flusher_should_finish = 0;
+
+        for_each_online_cpu(cpu) {
+                per_cpu(last_sought, cpu) = NULL;
+                per_cpu(last_low_page, cpu) = NULL;
+                per_cpu(last_high_page, cpu) = NULL;
+        }
+
+        /* Ensure all bits clear */
+        memory_bm_clear(io_map);
+
+        memory_bm_position_reset(io_map);
+        next = memory_bm_next_pfn(io_map, 0);
+
+        BUG_ON(next != BM_END_OF_MAP);
+
+        /* Set the bits for the pages to write */
+        memory_bm_position_reset(pageflags);
+
+        pfn = memory_bm_next_pfn(pageflags, 0);
+        toi_trace_index++;
+
+        while (pfn != BM_END_OF_MAP && index < finish_at) {
+                TOI_TRACE_DEBUG(pfn, "_io_pageset_%d (%d/%d)", pageset, index + 1, finish_at);
+                memory_bm_set_bit(io_map, 0, pfn);
+                pfn = memory_bm_next_pfn(pageflags, 0);
+                index++;
+        }
+
+        BUG_ON(next != BM_END_OF_MAP || index < finish_at);
+
+        memory_bm_position_reset(io_map);
+        toi_trace_index++;
+
+        atomic_set(&io_count, finish_at);
+
+        memory_bm_position_reset(pageset1_map);
+
+        mutex_lock(&io_mutex);
+
+        clear_toi_state(TOI_IO_STOPPED);
+
+        using_flusher = (atomic_read(&toi_num_other_threads) &&
+                         toiActiveAllocator->io_flusher &&
+                         !test_action_state(TOI_NO_FLUSHER_THREAD));
+
+        workers_started = atomic_read(&toi_num_other_threads);
+
+        memory_bm_position_reset(io_map);
+        memory_bm_position_reset(pageset1_copy_map);
+
+        toi_worker_command = TOI_IO_WORKER_RUN;
+        wake_up(&toi_worker_wait_queue);
+
+        mutex_unlock(&io_mutex);
+
+        if (using_flusher)
+                result = toiActiveAllocator->io_flusher(write);
+        else
+                worker_rw_loop(NULL);
+
+        while (atomic_read(&toi_io_workers))
+                schedule();
+
+        printk(KERN_CONT "\n");
+
+        toi_worker_command = TOI_IO_WORKER_STOP;
+        wake_up(&toi_worker_wait_queue);
+
+        if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
+                if (!atomic_read(&toi_io_workers)) {
+                        rw_cleanup_modules(READ);
+                        set_toi_state(TOI_IO_STOPPED);
+                }
+                while (1)
+                        schedule();
+        }
+        set_toi_state(TOI_IO_STOPPED);
+
+        if (!io_result && !result && !test_result_state(TOI_ABORTED)) {
+                unsigned long next;
+
+                toi_update_status(io_base + io_finish_at, io_barmax,
+                                " %d/%d MB ",
+                                MB(io_base + io_finish_at), MB(io_barmax));
+
+                memory_bm_position_reset(io_map);
+                next = memory_bm_next_pfn(io_map, 0);
+                if  (next != BM_END_OF_MAP) {
+                        printk(KERN_INFO "Finished I/O loop but still work to "
+                                        "do?\nFinish at = %d. io_count = %d.\n",
+                                        finish_at, atomic_read(&io_count));
+                        printk(KERN_INFO "I/O bitmap still records work to do."
+                                        "%ld.\n", next);
+                        BUG();
+                        do {
+                                cpu_relax();
+                        } while (0);
+                }
+        }
+
+        return io_result ? io_result : result;
+}
+
+/**
+ * write_pageset - write a pageset to disk.
+ * @pagedir:        Which pagedir to write.
+ *
+ * Returns:
+ *        Zero on success or -1 on failure.
+ **/
+int write_pageset(struct pagedir *pagedir)
+{
+        int finish_at, base = 0;
+        int barmax = pagedir1.size + pagedir2.size;
+        long error = 0;
+        struct memory_bitmap *pageflags;
+        unsigned long start_time, end_time;
+
+        /*
+         * Even if there is nothing to read or write, the allocator
+         * may need the init/cleanup for it's housekeeping.  (eg:
+         * Pageset1 may start where pageset2 ends when writing).
+         */
+        finish_at = pagedir->size;
+
+        if (pagedir->id == 1) {
+                toi_prepare_status(DONT_CLEAR_BAR,
+                                "Writing kernel & process data...");
+                base = pagedir2.size;
+                if (test_action_state(TOI_TEST_FILTER_SPEED) ||
+                    test_action_state(TOI_TEST_BIO))
+                        pageflags = pageset1_map;
+                else
+                        pageflags = pageset1_copy_map;
+        } else {
+                toi_prepare_status(DONT_CLEAR_BAR, "Writing caches...");
+                pageflags = pageset2_map;
+        }
+
+        start_time = jiffies;
+
+        if (rw_init_modules(WRITE, pagedir->id)) {
+                abort_hibernate(TOI_FAILED_MODULE_INIT,
+                                "Failed to initialise modules for writing.");
+                error = 1;
+        }
+
+        if (!error)
+                error = do_rw_loop(WRITE, finish_at, pageflags, base, barmax,
+                                pagedir->id);
+
+        if (rw_cleanup_modules(WRITE) && !error) {
+                abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
+                                "Failed to cleanup after writing.");
+                error = 1;
+        }
+
+        end_time = jiffies;
+
+        if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
+                toi_bkd.toi_io_time[0][0] += finish_at,
+                toi_bkd.toi_io_time[0][1] += (end_time - start_time);
+        }
+
+        return error;
+}
+
+/**
+ * read_pageset - highlevel function to read a pageset from disk
+ * @pagedir:                        pageset to read
+ * @overwrittenpagesonly:        Whether to read the whole pageset or
+ *                                only part of it.
+ *
+ * Returns:
+ *        Zero on success or -1 on failure.
+ **/
+static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly)
+{
+        int result = 0, base = 0;
+        int finish_at = pagedir->size;
+        int barmax = pagedir1.size + pagedir2.size;
+        struct memory_bitmap *pageflags;
+        unsigned long start_time, end_time;
+
+        if (pagedir->id == 1) {
+                toi_prepare_status(DONT_CLEAR_BAR,
+                                "Reading kernel & process data...");
+                pageflags = pageset1_map;
+        } else {
+                toi_prepare_status(DONT_CLEAR_BAR, "Reading caches...");
+                if (overwrittenpagesonly) {
+                        barmax = min(pagedir1.size, pagedir2.size);
+                        finish_at = min(pagedir1.size, pagedir2.size);
+                } else
+                        base = pagedir1.size;
+                pageflags = pageset2_map;
+        }
+
+        start_time = jiffies;
+
+        if (rw_init_modules(READ, pagedir->id)) {
+                toiActiveAllocator->remove_image();
+                result = 1;
+        } else
+                result = do_rw_loop(READ, finish_at, pageflags, base, barmax,
+                                pagedir->id);
+
+        if (rw_cleanup_modules(READ) && !result) {
+                abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
+                                "Failed to cleanup after reading.");
+                result = 1;
+        }
+
+        /* Statistics */
+        end_time = jiffies;
+
+        if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
+                toi_bkd.toi_io_time[1][0] += finish_at,
+                toi_bkd.toi_io_time[1][1] += (end_time - start_time);
+        }
+
+        return result;
+}
+
+/**
+ * write_module_configs - store the modules configuration
+ *
+ * The configuration for each module is stored in the image header.
+ * Returns: Int
+ *        Zero on success, Error value otherwise.
+ **/
+static int write_module_configs(void)
+{
+        struct toi_module_ops *this_module;
+        char *buffer = (char *) toi_get_zeroed_page(22, TOI_ATOMIC_GFP);
+        int len, index = 1;
+        struct toi_module_header toi_module_header;
+
+        if (!buffer) {
+                printk(KERN_INFO "Failed to allocate a buffer for saving "
+                                "module configuration info.\n");
+                return -ENOMEM;
+        }
+
+        /*
+         * We have to know which data goes with which module, so we at
+         * least write a length of zero for a module. Note that we are
+         * also assuming every module's config data takes <= PAGE_SIZE.
+         */
+
+        /* For each module (in registration order) */
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!this_module->enabled || !this_module->storage_needed ||
+                    (this_module->type == WRITER_MODULE &&
+                     toiActiveAllocator != this_module))
+                        continue;
+
+                /* Get the data from the module */
+                len = 0;
+                if (this_module->save_config_info)
+                        len = this_module->save_config_info(buffer);
+
+                /* Save the details of the module */
+                toi_module_header.enabled = this_module->enabled;
+                toi_module_header.type = this_module->type;
+                toi_module_header.index = index++;
+                strncpy(toi_module_header.name, this_module->name,
+                                        sizeof(toi_module_header.name));
+                toiActiveAllocator->rw_header_chunk(WRITE,
+                                this_module,
+                                (char *) &toi_module_header,
+                                sizeof(toi_module_header));
+
+                /* Save the size of the data and any data returned */
+                toiActiveAllocator->rw_header_chunk(WRITE,
+                                this_module,
+                                (char *) &len, sizeof(int));
+                if (len)
+                        toiActiveAllocator->rw_header_chunk(
+                                WRITE, this_module, buffer, len);
+        }
+
+        /* Write a blank header to terminate the list */
+        toi_module_header.name[0] = '\0';
+        toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+                        (char *) &toi_module_header, sizeof(toi_module_header));
+
+        toi_free_page(22, (unsigned long) buffer);
+        return 0;
+}
+
+/**
+ * read_one_module_config - read and configure one module
+ *
+ * Read the configuration for one module, and configure the module
+ * to match if it is loaded.
+ *
+ * Returns: Int
+ *        Zero on success, Error value otherwise.
+ **/
+static int read_one_module_config(struct toi_module_header *header)
+{
+        struct toi_module_ops *this_module;
+        int result, len;
+        char *buffer;
+
+        /* Find the module */
+        this_module = toi_find_module_given_name(header->name);
+
+        if (!this_module) {
+                if (header->enabled) {
+                        toi_early_boot_message(1, TOI_CONTINUE_REQ,
+                                "It looks like we need module %s for reading "
+                                "the image but it hasn't been registered.\n",
+                                header->name);
+                        if (!(test_toi_state(TOI_CONTINUE_REQ)))
+                                return -EINVAL;
+                } else
+                        printk(KERN_INFO "Module %s configuration data found, "
+                                "but the module hasn't registered. Looks like "
+                                "it was disabled, so we're ignoring its data.",
+                                header->name);
+        }
+
+        /* Get the length of the data (if any) */
+        result = toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &len,
+                        sizeof(int));
+        if (result) {
+                printk(KERN_ERR "Failed to read the length of the module %s's"
+                                " configuration data.\n",
+                                header->name);
+                return -EINVAL;
+        }
+
+        /* Read any data and pass to the module (if we found one) */
+        if (!len)
+                return 0;
+
+        buffer = (char *) toi_get_zeroed_page(23, TOI_ATOMIC_GFP);
+
+        if (!buffer) {
+                printk(KERN_ERR "Failed to allocate a buffer for reloading "
+                                "module configuration info.\n");
+                return -ENOMEM;
+        }
+
+        toiActiveAllocator->rw_header_chunk(READ, NULL, buffer, len);
+
+        if (!this_module)
+                goto out;
+
+        if (!this_module->save_config_info)
+                printk(KERN_ERR "Huh? Module %s appears to have a "
+                                "save_config_info, but not a load_config_info "
+                                "function!\n", this_module->name);
+        else
+                this_module->load_config_info(buffer, len);
+
+        /*
+         * Now move this module to the tail of its lists. This will put it in
+         * order. Any new modules will end up at the top of the lists. They
+         * should have been set to disabled when loaded (people will
+         * normally not edit an initrd to load a new module and then hibernate
+         * without using it!).
+         */
+
+        toi_move_module_tail(this_module);
+
+        this_module->enabled = header->enabled;
+
+out:
+        toi_free_page(23, (unsigned long) buffer);
+        return 0;
+}
+
+/**
+ * read_module_configs - reload module configurations from the image header.
+ *
+ * Returns: Int
+ *        Zero on success or an error code.
+ **/
+static int read_module_configs(void)
+{
+        int result = 0;
+        struct toi_module_header toi_module_header;
+        struct toi_module_ops *this_module;
+
+        /* All modules are initially disabled. That way, if we have a module
+         * loaded now that wasn't loaded when we hibernated, it won't be used
+         * in trying to read the data.
+         */
+        list_for_each_entry(this_module, &toi_modules, module_list)
+                this_module->enabled = 0;
+
+        /* Get the first module header */
+        result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+                        (char *) &toi_module_header,
+                        sizeof(toi_module_header));
+        if (result) {
+                printk(KERN_ERR "Failed to read the next module header.\n");
+                return -EINVAL;
+        }
+
+        /* For each module (in registration order) */
+        while (toi_module_header.name[0]) {
+                result = read_one_module_config(&toi_module_header);
+
+                if (result)
+                        return -EINVAL;
+
+                /* Get the next module header */
+                result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+                                (char *) &toi_module_header,
+                                sizeof(toi_module_header));
+
+                if (result) {
+                        printk(KERN_ERR "Failed to read the next module "
+                                        "header.\n");
+                        return -EINVAL;
+                }
+        }
+
+        return 0;
+}
+
+static inline int save_fs_info(struct fs_info *fs, struct block_device *bdev)
+{
+        return (!fs || IS_ERR(fs) || !fs->last_mount_size) ? 0 : 1;
+}
+
+int fs_info_space_needed(void)
+{
+        const struct super_block *sb;
+        int result = sizeof(int);
+
+        list_for_each_entry(sb, &super_blocks, s_list) {
+                struct fs_info *fs;
+
+                if (!sb->s_bdev)
+                        continue;
+
+                fs = fs_info_from_block_dev(sb->s_bdev);
+                if (save_fs_info(fs, sb->s_bdev))
+                        result += 16 + sizeof(dev_t) + sizeof(int) +
+                                fs->last_mount_size;
+                free_fs_info(fs);
+        }
+        return result;
+}
+
+static int fs_info_num_to_save(void)
+{
+        const struct super_block *sb;
+        int to_save = 0;
+
+        list_for_each_entry(sb, &super_blocks, s_list) {
+                struct fs_info *fs;
+
+                if (!sb->s_bdev)
+                        continue;
+
+                fs = fs_info_from_block_dev(sb->s_bdev);
+                if (save_fs_info(fs, sb->s_bdev))
+                        to_save++;
+                free_fs_info(fs);
+        }
+
+        return to_save;
+}
+
+static int fs_info_save(void)
+{
+        const struct super_block *sb;
+        int to_save = fs_info_num_to_save();
+
+        if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, (char *) &to_save,
+                                sizeof(int))) {
+                abort_hibernate(TOI_FAILED_IO, "Failed to write num fs_info"
+                                " to save.");
+                return -EIO;
+        }
+
+        list_for_each_entry(sb, &super_blocks, s_list) {
+                struct fs_info *fs;
+
+                if (!sb->s_bdev)
+                        continue;
+
+                fs = fs_info_from_block_dev(sb->s_bdev);
+                if (save_fs_info(fs, sb->s_bdev)) {
+                        if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+                                        &fs->uuid[0], 16)) {
+                                abort_hibernate(TOI_FAILED_IO, "Failed to "
+                                                "write uuid.");
+                                return -EIO;
+                        }
+                        if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+                                        (char *) &fs->dev_t, sizeof(dev_t))) {
+                                abort_hibernate(TOI_FAILED_IO, "Failed to "
+                                                "write dev_t.");
+                                return -EIO;
+                        }
+                        if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+                                        (char *) &fs->last_mount_size, sizeof(int))) {
+                                abort_hibernate(TOI_FAILED_IO, "Failed to "
+                                                "write last mount length.");
+                                return -EIO;
+                        }
+                        if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+                                        fs->last_mount, fs->last_mount_size)) {
+                                abort_hibernate(TOI_FAILED_IO, "Failed to "
+                                                "write uuid.");
+                                return -EIO;
+                        }
+                }
+                free_fs_info(fs);
+        }
+        return 0;
+}
+
+static int fs_info_load_and_check_one(void)
+{
+        char uuid[16], *last_mount;
+        int result = 0, ln;
+        dev_t dev_t;
+        struct block_device *dev;
+        struct fs_info *fs_info, seek;
+
+        if (toiActiveAllocator->rw_header_chunk(READ, NULL, uuid, 16)) {
+                abort_hibernate(TOI_FAILED_IO, "Failed to read uuid.");
+                return -EIO;
+        }
+
+        read_if_version(3, dev_t, "uuid dev_t field", return -EIO);
+
+        if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &ln,
+                                sizeof(int))) {
+                abort_hibernate(TOI_FAILED_IO,
+                                "Failed to read last mount size.");
+                return -EIO;
+        }
+
+        last_mount = kzalloc(ln, GFP_KERNEL);
+
+        if (!last_mount)
+                return -ENOMEM;
+
+        if (toiActiveAllocator->rw_header_chunk(READ, NULL, last_mount,        ln)) {
+                abort_hibernate(TOI_FAILED_IO,
+                                "Failed to read last mount timestamp.");
+                result = -EIO;
+                goto out_lmt;
+        }
+
+        strncpy((char *) &seek.uuid, uuid, 16);
+        seek.dev_t = dev_t;
+        seek.last_mount_size = ln;
+        seek.last_mount = last_mount;
+        dev_t = blk_lookup_fs_info(&seek);
+        if (!dev_t)
+                goto out_lmt;
+
+        dev = toi_open_by_devnum(dev_t);
+
+        fs_info = fs_info_from_block_dev(dev);
+        if (fs_info && !IS_ERR(fs_info)) {
+                if (ln != fs_info->last_mount_size) {
+                        printk(KERN_EMERG "Found matching uuid but last mount "
+                                        "time lengths differ?! "
+                                        "(%d vs %d).\n", ln,
+                                        fs_info->last_mount_size);
+                        result = -EINVAL;
+                } else {
+                        char buf[BDEVNAME_SIZE];
+                        result = !!memcmp(fs_info->last_mount, last_mount, ln);
+                        if (result)
+                                printk(KERN_EMERG "Last mount time for %s has "
+                                        "changed!\n", bdevname(dev, buf));
+                }
+        }
+        toi_close_bdev(dev);
+        free_fs_info(fs_info);
+out_lmt:
+        kfree(last_mount);
+        return result;
+}
+
+static int fs_info_load_and_check(void)
+{
+        int to_do, result = 0;
+
+        if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &to_do,
+                                sizeof(int))) {
+                abort_hibernate(TOI_FAILED_IO, "Failed to read num fs_info "
+                                "to load.");
+                return -EIO;
+        }
+
+        while(to_do--)
+                result |= fs_info_load_and_check_one();
+
+        return result;
+}
+
+/**
+ * write_image_header - write the image header after write the image proper
+ *
+ * Returns: Int
+ *        Zero on success, error value otherwise.
+ **/
+int write_image_header(void)
+{
+        int ret;
+        int total = pagedir1.size + pagedir2.size+2;
+        char *header_buffer = NULL;
+
+        /* Now prepare to write the header */
+        ret = toiActiveAllocator->write_header_init();
+        if (ret) {
+                abort_hibernate(TOI_FAILED_MODULE_INIT,
+                                "Active allocator's write_header_init"
+                                " function failed.");
+                goto write_image_header_abort;
+        }
+
+        /* Get a buffer */
+        header_buffer = (char *) toi_get_zeroed_page(24, TOI_ATOMIC_GFP);
+        if (!header_buffer) {
+                abort_hibernate(TOI_OUT_OF_MEMORY,
+                        "Out of memory when trying to get page for header!");
+                goto write_image_header_abort;
+        }
+
+        /* Write hibernate header */
+        if (fill_toi_header((struct toi_header *) header_buffer)) {
+                abort_hibernate(TOI_OUT_OF_MEMORY,
+                        "Failure to fill header information!");
+                goto write_image_header_abort;
+        }
+
+        if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+                        header_buffer, sizeof(struct toi_header))) {
+                abort_hibernate(TOI_OUT_OF_MEMORY,
+                        "Failure to write header info.");
+                goto write_image_header_abort;
+        }
+
+        if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+                        (char *) &toi_max_workers, sizeof(toi_max_workers))) {
+                abort_hibernate(TOI_OUT_OF_MEMORY,
+                        "Failure to number of workers to use.");
+                goto write_image_header_abort;
+        }
+
+        /* Write filesystem info */
+        if (fs_info_save())
+                goto write_image_header_abort;
+
+        /* Write module configurations */
+        ret = write_module_configs();
+        if (ret) {
+                abort_hibernate(TOI_FAILED_IO,
+                                "Failed to write module configs.");
+                goto write_image_header_abort;
+        }
+
+        if (memory_bm_write(pageset1_map,
+                                toiActiveAllocator->rw_header_chunk)) {
+                abort_hibernate(TOI_FAILED_IO,
+                                "Failed to write bitmaps.");
+                goto write_image_header_abort;
+        }
+
+        /* Flush data and let allocator cleanup */
+        if (toiActiveAllocator->write_header_cleanup()) {
+                abort_hibernate(TOI_FAILED_IO,
+                                "Failed to cleanup writing header.");
+                goto write_image_header_abort_no_cleanup;
+        }
+
+        if (test_result_state(TOI_ABORTED))
+                goto write_image_header_abort_no_cleanup;
+
+        toi_update_status(total, total, NULL);
+
+out:
+        if (header_buffer)
+                toi_free_page(24, (unsigned long) header_buffer);
+        return ret;
+
+write_image_header_abort:
+        toiActiveAllocator->write_header_cleanup();
+write_image_header_abort_no_cleanup:
+        ret = -1;
+        goto out;
+}
+
+/**
+ * sanity_check - check the header
+ * @sh:        the header which was saved at hibernate time.
+ *
+ * Perform a few checks, seeking to ensure that the kernel being
+ * booted matches the one hibernated. They need to match so we can
+ * be _sure_ things will work. It is not absolutely impossible for
+ * resuming from a different kernel to work, just not assured.
+ **/
+static char *sanity_check(struct toi_header *sh)
+{
+        char *reason = check_image_kernel((struct swsusp_info *) sh);
+
+        if (reason)
+                return reason;
+
+        if (!test_action_state(TOI_IGNORE_ROOTFS)) {
+                const struct super_block *sb;
+                list_for_each_entry(sb, &super_blocks, s_list) {
+                        if ((!(sb->s_flags & MS_RDONLY)) &&
+                            (sb->s_type->fs_flags & FS_REQUIRES_DEV))
+                                return "Device backed fs has been mounted "
+                                        "rw prior to resume or initrd/ramfs "
+                                        "is mounted rw.";
+                }
+        }
+
+        return NULL;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(freeze_wait);
+
+#define FREEZE_IN_PROGRESS (~0)
+
+static int freeze_result;
+
+static void do_freeze(struct work_struct *dummy)
+{
+        freeze_result = freeze_processes();
+        wake_up(&freeze_wait);
+        trap_non_toi_io = 1;
+}
+
+static DECLARE_WORK(freeze_work, do_freeze);
+
+/**
+ * __read_pageset1 - test for the existence of an image and attempt to load it
+ *
+ * Returns:        Int
+ *        Zero if image found and pageset1 successfully loaded.
+ *        Error if no image found or loaded.
+ **/
+static int __read_pageset1(void)
+{
+        int i, result = 0;
+        char *header_buffer = (char *) toi_get_zeroed_page(25, TOI_ATOMIC_GFP),
+             *sanity_error = NULL;
+        struct toi_header *toi_header;
+
+        if (!header_buffer) {
+                printk(KERN_INFO "Unable to allocate a page for reading the "
+                                "signature.\n");
+                return -ENOMEM;
+        }
+
+        /* Check for an image */
+        result = toiActiveAllocator->image_exists(1);
+        if (result == 3) {
+                result = -ENODATA;
+                toi_early_boot_message(1, 0, "The signature from an older "
+                                "version of TuxOnIce has been detected.");
+                goto out_remove_image;
+        }
+
+        if (result != 1) {
+                result = -ENODATA;
+                noresume_reset_modules();
+                printk(KERN_INFO "TuxOnIce: No image found.\n");
+                goto out;
+        }
+
+        /*
+         * Prepare the active allocator for reading the image header. The
+         * activate allocator might read its own configuration.
+         *
+         * NB: This call may never return because there might be a signature
+         * for a different image such that we warn the user and they choose
+         * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the
+         * location of the image might be unavailable if it was stored on a
+         * network connection).
+         */
+
+        result = toiActiveAllocator->read_header_init();
+        if (result) {
+                printk(KERN_INFO "TuxOnIce: Failed to initialise, reading the "
+                                "image header.\n");
+                goto out_remove_image;
+        }
+
+        /* Check for noresume command line option */
+        if (test_toi_state(TOI_NORESUME_SPECIFIED)) {
+                printk(KERN_INFO "TuxOnIce: Noresume on command line. Removed "
+                                "image.\n");
+                goto out_remove_image;
+        }
+
+        /* Check whether we've resumed before */
+        if (test_toi_state(TOI_RESUMED_BEFORE)) {
+                toi_early_boot_message(1, 0, NULL);
+                if (!(test_toi_state(TOI_CONTINUE_REQ))) {
+                        printk(KERN_INFO "TuxOnIce: Tried to resume before: "
+                                        "Invalidated image.\n");
+                        goto out_remove_image;
+                }
+        }
+
+        clear_toi_state(TOI_CONTINUE_REQ);
+
+        toi_image_header_version = toiActiveAllocator->get_header_version();
+
+        if (unlikely(toi_image_header_version > TOI_HEADER_VERSION)) {
+                toi_early_boot_message(1, 0, image_version_error);
+                if (!(test_toi_state(TOI_CONTINUE_REQ))) {
+                        printk(KERN_INFO "TuxOnIce: Header version too new: "
+                                        "Invalidated image.\n");
+                        goto out_remove_image;
+                }
+        }
+
+        /* Read hibernate header */
+        result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+                        header_buffer, sizeof(struct toi_header));
+        if (result < 0) {
+                printk(KERN_ERR "TuxOnIce: Failed to read the image "
+                                "signature.\n");
+                goto out_remove_image;
+        }
+
+        toi_header = (struct toi_header *) header_buffer;
+
+        /*
+         * NB: This call may also result in a reboot rather than returning.
+         */
+
+        sanity_error = sanity_check(toi_header);
+        if (sanity_error) {
+                toi_early_boot_message(1, TOI_CONTINUE_REQ,
+                                sanity_error);
+                printk(KERN_INFO "TuxOnIce: Sanity check failed.\n");
+                goto out_remove_image;
+        }
+
+        /*
+         * We have an image and it looks like it will load okay.
+         *
+         * Get metadata from header. Don't override commandline parameters.
+         *
+         * We don't need to save the image size limit because it's not used
+         * during resume and will be restored with the image anyway.
+         */
+
+        memcpy((char *) &pagedir1,
+                (char *) &toi_header->pagedir, sizeof(pagedir1));
+        toi_result = toi_header->param0;
+        if (!toi_bkd.toi_debug_state) {
+                toi_bkd.toi_action =
+                        (toi_header->param1 & ~toi_bootflags_mask) |
+                        (toi_bkd.toi_action & toi_bootflags_mask);
+                toi_bkd.toi_debug_state = toi_header->param2;
+                toi_bkd.toi_default_console_level = toi_header->param3;
+        }
+        clear_toi_state(TOI_IGNORE_LOGLEVEL);
+        pagedir2.size = toi_header->pageset_2_size;
+        for (i = 0; i < 4; i++)
+                toi_bkd.toi_io_time[i/2][i%2] =
+                        toi_header->io_time[i/2][i%2];
+
+        set_toi_state(TOI_BOOT_KERNEL);
+        boot_kernel_data_buffer = toi_header->bkd;
+
+        read_if_version(1, toi_max_workers, "TuxOnIce max workers",
+                        goto out_remove_image);
+
+        /* Read filesystem info */
+        if (fs_info_load_and_check()) {
+                printk(KERN_EMERG "TuxOnIce: File system mount time checks "
+                        "failed. Refusing to corrupt your filesystems!\n");
+                goto out_remove_image;
+        }
+
+        /* Read module configurations */
+        result = read_module_configs();
+        if (result) {
+                pagedir1.size = 0;
+                pagedir2.size = 0;
+                printk(KERN_INFO "TuxOnIce: Failed to read TuxOnIce module "
+                                "configurations.\n");
+                clear_action_state(TOI_KEEP_IMAGE);
+                goto out_remove_image;
+        }
+
+        toi_prepare_console();
+
+        set_toi_state(TOI_NOW_RESUMING);
+
+        result = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+        if (result)
+                goto out_notifier_call_chain;;
+
+        if (usermodehelper_disable())
+                goto out_enable_usermodehelper;
+
+        current->flags |= PF_NOFREEZE;
+        freeze_result = FREEZE_IN_PROGRESS;
+
+        schedule_work_on(cpumask_first(cpu_online_mask), &freeze_work);
+
+        toi_cond_pause(1, "About to read original pageset1 locations.");
+
+        /*
+         * See _toi_rw_header_chunk in tuxonice_bio.c:
+         * Initialize pageset1_map by reading the map from the image.
+         */
+        if (memory_bm_read(pageset1_map, toiActiveAllocator->rw_header_chunk))
+                goto out_thaw;
+
+        /*
+         * See toi_rw_cleanup in tuxonice_bio.c:
+         * Clean up after reading the header.
+         */
+        result = toiActiveAllocator->read_header_cleanup();
+        if (result) {
+                printk(KERN_ERR "TuxOnIce: Failed to cleanup after reading the "
+                                "image header.\n");
+                goto out_thaw;
+        }
+
+        toi_cond_pause(1, "About to read pagedir.");
+
+        /*
+         * Get the addresses of pages into which we will load the kernel to
+         * be copied back and check if they conflict with the ones we are using.
+         */
+        if (toi_get_pageset1_load_addresses()) {
+                printk(KERN_INFO "TuxOnIce: Failed to get load addresses for "
+                                "pageset1.\n");
+                goto out_thaw;
+        }
+
+        /* Read the original kernel back */
+        toi_cond_pause(1, "About to read pageset 1.");
+
+        /* Given the pagemap, read back the data from disk */
+        if (read_pageset(&pagedir1, 0)) {
+                toi_prepare_status(DONT_CLEAR_BAR, "Failed to read pageset 1.");
+                result = -EIO;
+                goto out_thaw;
+        }
+
+        toi_cond_pause(1, "About to restore original kernel.");
+        result = 0;
+
+        if (!toi_keeping_image &&
+            toiActiveAllocator->mark_resume_attempted)
+                toiActiveAllocator->mark_resume_attempted(1);
+
+        wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
+out:
+        current->flags &= ~PF_NOFREEZE;
+        toi_free_page(25, (unsigned long) header_buffer);
+        return result;
+
+out_thaw:
+        wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
+        trap_non_toi_io = 0;
+        thaw_processes();
+out_enable_usermodehelper:
+        usermodehelper_enable();
+out_notifier_call_chain:
+        pm_notifier_call_chain(PM_POST_RESTORE);
+        toi_cleanup_console();
+out_remove_image:
+        result = -EINVAL;
+        if (!toi_keeping_image)
+                toiActiveAllocator->remove_image();
+        toiActiveAllocator->read_header_cleanup();
+        noresume_reset_modules();
+        goto out;
+}
+
+/**
+ * read_pageset1 - highlevel function to read the saved pages
+ *
+ * Attempt to read the header and pageset1 of a hibernate image.
+ * Handle the outcome, complaining where appropriate.
+ **/
+int read_pageset1(void)
+{
+        int error;
+
+        error = __read_pageset1();
+
+        if (error && error != -ENODATA && error != -EINVAL &&
+                                        !test_result_state(TOI_ABORTED))
+                abort_hibernate(TOI_IMAGE_ERROR,
+                        "TuxOnIce: Error %d resuming\n", error);
+
+        return error;
+}
+
+/**
+ * get_have_image_data - check the image header
+ **/
+static char *get_have_image_data(void)
+{
+        char *output_buffer = (char *) toi_get_zeroed_page(26, TOI_ATOMIC_GFP);
+        struct toi_header *toi_header;
+
+        if (!output_buffer) {
+                printk(KERN_INFO "Output buffer null.\n");
+                return NULL;
+        }
+
+        /* Check for an image */
+        if (!toiActiveAllocator->image_exists(1) ||
+            toiActiveAllocator->read_header_init() ||
+            toiActiveAllocator->rw_header_chunk(READ, NULL,
+                        output_buffer, sizeof(struct toi_header))) {
+                sprintf(output_buffer, "0\n");
+                /*
+                 * From an initrd/ramfs, catting have_image and
+                 * getting a result of 0 is sufficient.
+                 */
+                clear_toi_state(TOI_BOOT_TIME);
+                goto out;
+        }
+
+        toi_header = (struct toi_header *) output_buffer;
+
+        sprintf(output_buffer, "1\n%s\n%s\n",
+                        toi_header->uts.machine,
+                        toi_header->uts.version);
+
+        /* Check whether we've resumed before */
+        if (test_toi_state(TOI_RESUMED_BEFORE))
+                strcat(output_buffer, "Resumed before.\n");
+
+out:
+        noresume_reset_modules();
+        return output_buffer;
+}
+
+/**
+ * read_pageset2 - read second part of the image
+ * @overwrittenpagesonly:        Read only pages which would have been
+ *                                verwritten by pageset1?
+ *
+ * Read in part or all of pageset2 of an image, depending upon
+ * whether we are hibernating and have only overwritten a portion
+ * with pageset1 pages, or are resuming and need to read them
+ * all.
+ *
+ * Returns: Int
+ *        Zero if no error, otherwise the error value.
+ **/
+int read_pageset2(int overwrittenpagesonly)
+{
+        int result = 0;
+
+        if (!pagedir2.size)
+                return 0;
+
+        result = read_pageset(&pagedir2, overwrittenpagesonly);
+
+        toi_cond_pause(1, "Pagedir 2 read.");
+
+        return result;
+}
+
+/**
+ * image_exists_read - has an image been found?
+ * @page:        Output buffer
+ *
+ * Store 0 or 1 in page, depending on whether an image is found.
+ * Incoming buffer is PAGE_SIZE and result is guaranteed
+ * to be far less than that, so we don't worry about
+ * overflow.
+ **/
+int image_exists_read(const char *page, int count)
+{
+        int len = 0;
+        char *result;
+
+        if (toi_activate_storage(0))
+                return count;
+
+        if (!test_toi_state(TOI_RESUME_DEVICE_OK))
+                toi_attempt_to_parse_resume_device(0);
+
+        if (!toiActiveAllocator) {
+                len = sprintf((char *) page, "-1\n");
+        } else {
+                result = get_have_image_data();
+                if (result) {
+                        len = sprintf((char *) page, "%s",  result);
+                        toi_free_page(26, (unsigned long) result);
+                }
+        }
+
+        toi_deactivate_storage(0);
+
+        return len;
+}
+
+/**
+ * image_exists_write - invalidate an image if one exists
+ **/
+int image_exists_write(const char *buffer, int count)
+{
+        if (toi_activate_storage(0))
+                return count;
+
+        if (toiActiveAllocator && toiActiveAllocator->image_exists(1))
+                toiActiveAllocator->remove_image();
+
+        toi_deactivate_storage(0);
+
+        clear_result_state(TOI_KEPT_IMAGE);
+
+        return count;
+}
diff -Nur linux-4.2/kernel/power/tuxonice_io.h linux-4.2-pck/kernel/power/tuxonice_io.h
--- linux-4.2/kernel/power/tuxonice_io.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_io.h	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,72 @@
+/*
+ * kernel/power/tuxonice_io.h
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains high level IO routines for hibernating.
+ *
+ */
+
+#include <linux/utsname.h>
+#include "tuxonice_pagedir.h"
+
+/* Non-module data saved in our image header */
+struct toi_header {
+        /*
+         * Mirror struct swsusp_info, but without
+         * the page aligned attribute
+         */
+        struct new_utsname uts;
+        u32 version_code;
+        unsigned long num_physpages;
+        int cpus;
+        unsigned long image_pages;
+        unsigned long pages;
+        unsigned long size;
+
+        /* Our own data */
+        unsigned long orig_mem_free;
+        int page_size;
+        int pageset_2_size;
+        int param0;
+        int param1;
+        int param2;
+        int param3;
+        int progress0;
+        int progress1;
+        int progress2;
+        int progress3;
+        int io_time[2][2];
+        struct pagedir pagedir;
+        dev_t root_fs;
+        unsigned long bkd; /* Boot kernel data locn */
+};
+
+extern int write_pageset(struct pagedir *pagedir);
+extern int write_image_header(void);
+extern int read_pageset1(void);
+extern int read_pageset2(int overwrittenpagesonly);
+
+extern int toi_attempt_to_parse_resume_device(int quiet);
+extern void attempt_to_parse_resume_device2(void);
+extern void attempt_to_parse_alt_resume_param(void);
+int image_exists_read(const char *page, int count);
+int image_exists_write(const char *buffer, int count);
+extern void save_restore_alt_param(int replace, int quiet);
+extern atomic_t toi_io_workers;
+
+/* Args to save_restore_alt_param */
+#define RESTORE 0
+#define SAVE 1
+
+#define NOQUIET 0
+#define QUIET 1
+
+extern wait_queue_head_t toi_io_queue_flusher;
+extern int toi_bio_queue_flusher_should_finish;
+
+int fs_info_space_needed(void);
+
+extern int toi_max_workers;
diff -Nur linux-4.2/kernel/power/tuxonice_modules.c linux-4.2-pck/kernel/power/tuxonice_modules.c
--- linux-4.2/kernel/power/tuxonice_modules.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_modules.c	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,520 @@
+/*
+ * kernel/power/tuxonice_modules.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_ui.h"
+
+LIST_HEAD(toi_filters);
+LIST_HEAD(toiAllocators);
+
+LIST_HEAD(toi_modules);
+
+struct toi_module_ops *toiActiveAllocator;
+
+static int toi_num_filters;
+int toiNumAllocators, toi_num_modules;
+
+/*
+ * toi_header_storage_for_modules
+ *
+ * Returns the amount of space needed to store configuration
+ * data needed by the modules prior to copying back the original
+ * kernel. We can exclude data for pageset2 because it will be
+ * available anyway once the kernel is copied back.
+ */
+long toi_header_storage_for_modules(void)
+{
+        struct toi_module_ops *this_module;
+        int bytes = 0;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!this_module->enabled ||
+                    (this_module->type == WRITER_MODULE &&
+                     toiActiveAllocator != this_module))
+                        continue;
+                if (this_module->storage_needed) {
+                        int this = this_module->storage_needed() +
+                                sizeof(struct toi_module_header) +
+                                sizeof(int);
+                        this_module->header_requested = this;
+                        bytes += this;
+                }
+        }
+
+        /* One more for the empty terminator */
+        return bytes + sizeof(struct toi_module_header);
+}
+
+void print_toi_header_storage_for_modules(void)
+{
+        struct toi_module_ops *this_module;
+        int bytes = 0;
+
+        printk(KERN_DEBUG "Header storage:\n");
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!this_module->enabled ||
+                    (this_module->type == WRITER_MODULE &&
+                     toiActiveAllocator != this_module))
+                        continue;
+                if (this_module->storage_needed) {
+                        int this = this_module->storage_needed() +
+                                sizeof(struct toi_module_header) +
+                                sizeof(int);
+                        this_module->header_requested = this;
+                        bytes += this;
+                        printk(KERN_DEBUG "+ %16s : %-4d/%d.\n",
+                                        this_module->name,
+                                        this_module->header_used, this);
+                }
+        }
+
+        printk(KERN_DEBUG "+ empty terminator : %zu.\n",
+                        sizeof(struct toi_module_header));
+        printk(KERN_DEBUG "                     ====\n");
+        printk(KERN_DEBUG "                     %zu\n",
+                        bytes + sizeof(struct toi_module_header));
+}
+
+/*
+ * toi_memory_for_modules
+ *
+ * Returns the amount of memory requested by modules for
+ * doing their work during the cycle.
+ */
+
+long toi_memory_for_modules(int print_parts)
+{
+        long bytes = 0, result;
+        struct toi_module_ops *this_module;
+
+        if (print_parts)
+                printk(KERN_INFO "Memory for modules:\n===================\n");
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                int this;
+                if (!this_module->enabled)
+                        continue;
+                if (this_module->memory_needed) {
+                        this = this_module->memory_needed();
+                        if (print_parts)
+                                printk(KERN_INFO "%10d bytes (%5ld pages) for "
+                                                "module '%s'.\n", this,
+                                                DIV_ROUND_UP(this, PAGE_SIZE),
+                                                this_module->name);
+                        bytes += this;
+                }
+        }
+
+        result = DIV_ROUND_UP(bytes, PAGE_SIZE);
+        if (print_parts)
+                printk(KERN_INFO " => %ld bytes, %ld pages.\n", bytes, result);
+
+        return result;
+}
+
+/*
+ * toi_expected_compression_ratio
+ *
+ * Returns the compression ratio expected when saving the image.
+ */
+
+int toi_expected_compression_ratio(void)
+{
+        int ratio = 100;
+        struct toi_module_ops *this_module;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!this_module->enabled)
+                        continue;
+                if (this_module->expected_compression)
+                        ratio = ratio * this_module->expected_compression()
+                                / 100;
+        }
+
+        return ratio;
+}
+
+/* toi_find_module_given_dir
+ * Functionality :        Return a module (if found), given a pointer
+ *                         to its directory name
+ */
+
+static struct toi_module_ops *toi_find_module_given_dir(char *name)
+{
+        struct toi_module_ops *this_module, *found_module = NULL;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!strcmp(name, this_module->directory)) {
+                        found_module = this_module;
+                        break;
+                }
+        }
+
+        return found_module;
+}
+
+/* toi_find_module_given_name
+ * Functionality :        Return a module (if found), given a pointer
+ *                         to its name
+ */
+
+struct toi_module_ops *toi_find_module_given_name(char *name)
+{
+        struct toi_module_ops *this_module, *found_module = NULL;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!strcmp(name, this_module->name)) {
+                        found_module = this_module;
+                        break;
+                }
+        }
+
+        return found_module;
+}
+
+/*
+ * toi_print_module_debug_info
+ * Functionality   : Get debugging info from modules into a buffer.
+ */
+int toi_print_module_debug_info(char *buffer, int buffer_size)
+{
+        struct toi_module_ops *this_module;
+        int len = 0;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!this_module->enabled)
+                        continue;
+                if (this_module->print_debug_info) {
+                        int result;
+                        result = this_module->print_debug_info(buffer + len,
+                                        buffer_size - len);
+                        len += result;
+                }
+        }
+
+        /* Ensure null terminated */
+        buffer[buffer_size] = 0;
+
+        return len;
+}
+
+/*
+ * toi_register_module
+ *
+ * Register a module.
+ */
+int toi_register_module(struct toi_module_ops *module)
+{
+        int i;
+        struct kobject *kobj;
+
+        if (!hibernation_available())
+          return -ENODEV;
+
+        module->enabled = 1;
+
+        if (toi_find_module_given_name(module->name)) {
+                printk(KERN_INFO "TuxOnIce: Trying to load module %s,"
+                                " which is already registered.\n",
+                                module->name);
+                return -EBUSY;
+        }
+
+        switch (module->type) {
+        case FILTER_MODULE:
+                list_add_tail(&module->type_list, &toi_filters);
+                toi_num_filters++;
+                break;
+        case WRITER_MODULE:
+                list_add_tail(&module->type_list, &toiAllocators);
+                toiNumAllocators++;
+                break;
+        case MISC_MODULE:
+        case MISC_HIDDEN_MODULE:
+        case BIO_ALLOCATOR_MODULE:
+                break;
+        default:
+                printk(KERN_ERR "Hmmm. Module '%s' has an invalid type."
+                        " It has been ignored.\n", module->name);
+                return -EINVAL;
+        }
+        list_add_tail(&module->module_list, &toi_modules);
+        toi_num_modules++;
+
+        if ((!module->directory && !module->shared_directory) ||
+                        !module->sysfs_data || !module->num_sysfs_entries)
+                return 0;
+
+        /*
+         * Modules may share a directory, but those with shared_dir
+         * set must be loaded (via symbol dependencies) after parents
+         * and unloaded beforehand.
+         */
+        if (module->shared_directory) {
+                struct toi_module_ops *shared =
+                        toi_find_module_given_dir(module->shared_directory);
+                if (!shared) {
+                        printk(KERN_ERR "TuxOnIce: Module %s wants to share "
+                                        "%s's directory but %s isn't loaded.\n",
+                                        module->name, module->shared_directory,
+                                        module->shared_directory);
+                        toi_unregister_module(module);
+                        return -ENODEV;
+                }
+                kobj = shared->dir_kobj;
+        } else {
+                if (!strncmp(module->directory, "[ROOT]", 6))
+                        kobj = tuxonice_kobj;
+                else
+                        kobj = make_toi_sysdir(module->directory);
+        }
+        module->dir_kobj = kobj;
+        for (i = 0; i < module->num_sysfs_entries; i++) {
+                int result = toi_register_sysfs_file(kobj,
+                                &module->sysfs_data[i]);
+                if (result)
+                        return result;
+        }
+        return 0;
+}
+
+/*
+ * toi_unregister_module
+ *
+ * Remove a module.
+ */
+void toi_unregister_module(struct toi_module_ops *module)
+{
+        int i;
+
+        if (module->dir_kobj)
+                for (i = 0; i < module->num_sysfs_entries; i++)
+                        toi_unregister_sysfs_file(module->dir_kobj,
+                                        &module->sysfs_data[i]);
+
+        if (!module->shared_directory && module->directory &&
+                        strncmp(module->directory, "[ROOT]", 6))
+                remove_toi_sysdir(module->dir_kobj);
+
+        switch (module->type) {
+        case FILTER_MODULE:
+                list_del(&module->type_list);
+                toi_num_filters--;
+                break;
+        case WRITER_MODULE:
+                list_del(&module->type_list);
+                toiNumAllocators--;
+                if (toiActiveAllocator == module) {
+                        toiActiveAllocator = NULL;
+                        clear_toi_state(TOI_CAN_RESUME);
+                        clear_toi_state(TOI_CAN_HIBERNATE);
+                }
+                break;
+        case MISC_MODULE:
+        case MISC_HIDDEN_MODULE:
+        case BIO_ALLOCATOR_MODULE:
+                break;
+        default:
+                printk(KERN_ERR "Module '%s' has an invalid type."
+                        " It has been ignored.\n", module->name);
+                return;
+        }
+        list_del(&module->module_list);
+        toi_num_modules--;
+}
+
+/*
+ * toi_move_module_tail
+ *
+ * Rearrange modules when reloading the config.
+ */
+void toi_move_module_tail(struct toi_module_ops *module)
+{
+        switch (module->type) {
+        case FILTER_MODULE:
+                if (toi_num_filters > 1)
+                        list_move_tail(&module->type_list, &toi_filters);
+                break;
+        case WRITER_MODULE:
+                if (toiNumAllocators > 1)
+                        list_move_tail(&module->type_list, &toiAllocators);
+                break;
+        case MISC_MODULE:
+        case MISC_HIDDEN_MODULE:
+        case BIO_ALLOCATOR_MODULE:
+                break;
+        default:
+                printk(KERN_ERR "Module '%s' has an invalid type."
+                        " It has been ignored.\n", module->name);
+                return;
+        }
+        if ((toi_num_filters + toiNumAllocators) > 1)
+                list_move_tail(&module->module_list, &toi_modules);
+}
+
+/*
+ * toi_initialise_modules
+ *
+ * Get ready to do some work!
+ */
+int toi_initialise_modules(int starting_cycle, int early)
+{
+        struct toi_module_ops *this_module;
+        int result;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                this_module->header_requested = 0;
+                this_module->header_used = 0;
+                if (!this_module->enabled)
+                        continue;
+                if (this_module->early != early)
+                        continue;
+                if (this_module->initialise) {
+                        result = this_module->initialise(starting_cycle);
+                        if (result) {
+                                toi_cleanup_modules(starting_cycle);
+                                return result;
+                        }
+                        this_module->initialised = 1;
+                }
+        }
+
+        return 0;
+}
+
+/*
+ * toi_cleanup_modules
+ *
+ * Tell modules the work is done.
+ */
+void toi_cleanup_modules(int finishing_cycle)
+{
+        struct toi_module_ops *this_module;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (!this_module->enabled || !this_module->initialised)
+                        continue;
+                if (this_module->cleanup)
+                        this_module->cleanup(finishing_cycle);
+                this_module->initialised = 0;
+        }
+}
+
+/*
+ * toi_pre_atomic_restore_modules
+ *
+ * Get ready to do some work!
+ */
+void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd)
+{
+        struct toi_module_ops *this_module;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (this_module->enabled && this_module->pre_atomic_restore)
+                        this_module->pre_atomic_restore(bkd);
+        }
+}
+
+/*
+ * toi_post_atomic_restore_modules
+ *
+ * Get ready to do some work!
+ */
+void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd)
+{
+        struct toi_module_ops *this_module;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (this_module->enabled && this_module->post_atomic_restore)
+                        this_module->post_atomic_restore(bkd);
+        }
+}
+
+/*
+ * toi_get_next_filter
+ *
+ * Get the next filter in the pipeline.
+ */
+struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *filter_sought)
+{
+        struct toi_module_ops *last_filter = NULL, *this_filter = NULL;
+
+        list_for_each_entry(this_filter, &toi_filters, type_list) {
+                if (!this_filter->enabled)
+                        continue;
+                if ((last_filter == filter_sought) || (!filter_sought))
+                        return this_filter;
+                last_filter = this_filter;
+        }
+
+        return toiActiveAllocator;
+}
+
+/**
+ * toi_show_modules: Printk what support is loaded.
+ */
+void toi_print_modules(void)
+{
+        struct toi_module_ops *this_module;
+        int prev = 0;
+
+        printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION ", with support for");
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                if (this_module->type == MISC_HIDDEN_MODULE)
+                        continue;
+                printk("%s %s%s%s", prev ? "," : "",
+                                this_module->enabled ? "" : "[",
+                                this_module->name,
+                                this_module->enabled ? "" : "]");
+                prev = 1;
+        }
+
+        printk(".\n");
+}
+
+/* toi_get_modules
+ *
+ * Take a reference to modules so they can't go away under us.
+ */
+
+int toi_get_modules(void)
+{
+        struct toi_module_ops *this_module;
+
+        list_for_each_entry(this_module, &toi_modules, module_list) {
+                struct toi_module_ops *this_module2;
+
+                if (try_module_get(this_module->module))
+                        continue;
+
+                /* Failed! Reverse gets and return error */
+                list_for_each_entry(this_module2, &toi_modules,
+                                module_list) {
+                        if (this_module == this_module2)
+                                return -EINVAL;
+                        module_put(this_module2->module);
+                }
+        }
+        return 0;
+}
+
+/* toi_put_modules
+ *
+ * Release our references to modules we used.
+ */
+
+void toi_put_modules(void)
+{
+        struct toi_module_ops *this_module;
+
+        list_for_each_entry(this_module, &toi_modules, module_list)
+                module_put(this_module->module);
+}
diff -Nur linux-4.2/kernel/power/tuxonice_modules.h linux-4.2-pck/kernel/power/tuxonice_modules.h
--- linux-4.2/kernel/power/tuxonice_modules.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_modules.h	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,212 @@
+/*
+ * kernel/power/tuxonice_modules.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations for modules. Modules are additions to
+ * TuxOnIce that provide facilities such as image compression or
+ * encryption, backends for storage of the image and user interfaces.
+ *
+ */
+
+#ifndef TOI_MODULES_H
+#define TOI_MODULES_H
+
+/* This is the maximum size we store in the image header for a module name */
+#define TOI_MAX_MODULE_NAME_LENGTH 30
+
+struct toi_boot_kernel_data;
+
+/* Per-module metadata */
+struct toi_module_header {
+        char name[TOI_MAX_MODULE_NAME_LENGTH];
+        int enabled;
+        int type;
+        int index;
+        int data_length;
+        unsigned long signature;
+};
+
+enum {
+        FILTER_MODULE,
+        WRITER_MODULE,
+        BIO_ALLOCATOR_MODULE,
+        MISC_MODULE,
+        MISC_HIDDEN_MODULE,
+};
+
+enum {
+        TOI_ASYNC,
+        TOI_SYNC
+};
+
+enum {
+        TOI_VIRT,
+        TOI_PAGE,
+};
+
+#define TOI_MAP(type, addr) \
+ (type == TOI_PAGE ? kmap(addr) : addr)
+
+#define TOI_UNMAP(type, addr) \
+ do { \
+   if (type == TOI_PAGE) \
+     kunmap(addr); \
+ } while(0)
+
+struct toi_module_ops {
+        /* Functions common to all modules */
+        int type;
+        char *name;
+        char *directory;
+        char *shared_directory;
+        struct kobject *dir_kobj;
+        struct module *module;
+        int enabled, early, initialised;
+        struct list_head module_list;
+
+        /* List of filters or allocators */
+        struct list_head list, type_list;
+
+        /*
+         * Requirements for memory and storage in
+         * the image header..
+         */
+        int (*memory_needed) (void);
+        int (*storage_needed) (void);
+
+        int header_requested, header_used;
+
+        int (*expected_compression) (void);
+
+        /*
+         * Debug info
+         */
+        int (*print_debug_info) (char *buffer, int size);
+        int (*save_config_info) (char *buffer);
+        void (*load_config_info) (char *buffer, int len);
+
+        /*
+         * Initialise & cleanup - general routines called
+         * at the start and end of a cycle.
+         */
+        int (*initialise) (int starting_cycle);
+        void (*cleanup) (int finishing_cycle);
+
+        void (*pre_atomic_restore) (struct toi_boot_kernel_data *bkd);
+        void (*post_atomic_restore) (struct toi_boot_kernel_data *bkd);
+
+        /*
+         * Calls for allocating storage (allocators only).
+         *
+         * Header space is requested separately and cannot fail, but the
+         * reservation is only applied when main storage is allocated.
+         * The header space reservation is thus always set prior to
+         * requesting the allocation of storage - and prior to querying
+         * how much storage is available.
+         */
+
+        unsigned long (*storage_available) (void);
+        void (*reserve_header_space) (unsigned long space_requested);
+        int (*register_storage) (void);
+        int (*allocate_storage) (unsigned long space_requested);
+        unsigned long (*storage_allocated) (void);
+        void (*free_unused_storage) (void);
+
+        /*
+         * Routines used in image I/O.
+         */
+        int (*rw_init) (int rw, int stream_number);
+        int (*rw_cleanup) (int rw);
+        int (*write_page) (unsigned long index, int buf_type, void *buf,
+                        unsigned int buf_size);
+        int (*read_page) (unsigned long *index, int buf_type, void *buf,
+                        unsigned int *buf_size);
+        int (*io_flusher) (int rw);
+
+        /* Reset module if image exists but reading aborted */
+        void (*noresume_reset) (void);
+
+        /* Read and write the metadata */
+        int (*write_header_init) (void);
+        int (*write_header_cleanup) (void);
+
+        int (*read_header_init) (void);
+        int (*read_header_cleanup) (void);
+
+        /* To be called after read_header_init */
+        int (*get_header_version) (void);
+
+        int (*rw_header_chunk) (int rw, struct toi_module_ops *owner,
+                        char *buffer_start, int buffer_size);
+
+        int (*rw_header_chunk_noreadahead) (int rw,
+                        struct toi_module_ops *owner, char *buffer_start,
+                        int buffer_size);
+
+        /* Attempt to parse an image location */
+        int (*parse_sig_location) (char *buffer, int only_writer, int quiet);
+
+        /* Throttle I/O according to throughput */
+        void (*update_throughput_throttle) (int jif_index);
+
+        /* Flush outstanding I/O */
+        int (*finish_all_io) (void);
+
+        /* Determine whether image exists that we can restore */
+        int (*image_exists) (int quiet);
+
+        /* Mark the image as having tried to resume */
+        int (*mark_resume_attempted) (int);
+
+        /* Destroy image if one exists */
+        int (*remove_image) (void);
+
+        /* Sysfs Data */
+        struct toi_sysfs_data *sysfs_data;
+        int num_sysfs_entries;
+
+        /* Block I/O allocator */
+        struct toi_bio_allocator_ops *bio_allocator_ops;
+};
+
+extern int toi_num_modules, toiNumAllocators;
+
+extern struct toi_module_ops *toiActiveAllocator;
+extern struct list_head toi_filters, toiAllocators, toi_modules;
+
+extern void toi_prepare_console_modules(void);
+extern void toi_cleanup_console_modules(void);
+
+extern struct toi_module_ops *toi_find_module_given_name(char *name);
+extern struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *);
+
+extern int toi_register_module(struct toi_module_ops *module);
+extern void toi_move_module_tail(struct toi_module_ops *module);
+
+extern long toi_header_storage_for_modules(void);
+extern long toi_memory_for_modules(int print_parts);
+extern void print_toi_header_storage_for_modules(void);
+extern int toi_expected_compression_ratio(void);
+
+extern int toi_print_module_debug_info(char *buffer, int buffer_size);
+extern int toi_register_module(struct toi_module_ops *module);
+extern void toi_unregister_module(struct toi_module_ops *module);
+
+extern int toi_initialise_modules(int starting_cycle, int early);
+#define toi_initialise_modules_early(starting) \
+        toi_initialise_modules(starting, 1)
+#define toi_initialise_modules_late(starting) \
+        toi_initialise_modules(starting, 0)
+extern void toi_cleanup_modules(int finishing_cycle);
+
+extern void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd);
+extern void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd);
+
+extern void toi_print_modules(void);
+
+int toi_get_modules(void);
+void toi_put_modules(void);
+#endif
diff -Nur linux-4.2/kernel/power/tuxonice_netlink.c linux-4.2-pck/kernel/power/tuxonice_netlink.c
--- linux-4.2/kernel/power/tuxonice_netlink.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_netlink.c	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,324 @@
+/*
+ * kernel/power/tuxonice_netlink.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Functions for communicating with a userspace helper via netlink.
+ */
+
+#include <linux/suspend.h>
+#include <linux/sched.h>
+#include <linux/kmod.h>
+#include "tuxonice_netlink.h"
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_builtin.h"
+
+static struct user_helper_data *uhd_list;
+
+/*
+ * Refill our pool of SKBs for use in emergencies (eg, when eating memory and
+ * none can be allocated).
+ */
+static void toi_fill_skb_pool(struct user_helper_data *uhd)
+{
+        while (uhd->pool_level < uhd->pool_limit) {
+                struct sk_buff *new_skb =
+                        alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
+
+                if (!new_skb)
+                        break;
+
+                new_skb->next = uhd->emerg_skbs;
+                uhd->emerg_skbs = new_skb;
+                uhd->pool_level++;
+        }
+}
+
+/*
+ * Try to allocate a single skb. If we can't get one, try to use one from
+ * our pool.
+ */
+static struct sk_buff *toi_get_skb(struct user_helper_data *uhd)
+{
+        struct sk_buff *skb =
+                alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
+
+        if (skb)
+                return skb;
+
+        skb = uhd->emerg_skbs;
+        if (skb) {
+                uhd->pool_level--;
+                uhd->emerg_skbs = skb->next;
+                skb->next = NULL;
+        }
+
+        return skb;
+}
+
+void toi_send_netlink_message(struct user_helper_data *uhd,
+                int type, void *params, size_t len)
+{
+        struct sk_buff *skb;
+        struct nlmsghdr *nlh;
+        void *dest;
+        struct task_struct *t;
+
+        if (uhd->pid == -1)
+                return;
+
+        if (uhd->debug)
+                printk(KERN_ERR "toi_send_netlink_message: Send "
+                                "message type %d.\n", type);
+
+        skb = toi_get_skb(uhd);
+        if (!skb) {
+                printk(KERN_INFO "toi_netlink: Can't allocate skb!\n");
+                return;
+        }
+
+        nlh = nlmsg_put(skb, 0, uhd->sock_seq, type, len, 0);
+        uhd->sock_seq++;
+
+        dest = NLMSG_DATA(nlh);
+        if (params && len > 0)
+                memcpy(dest, params, len);
+
+        netlink_unicast(uhd->nl, skb, uhd->pid, 0);
+
+        toi_read_lock_tasklist();
+        t = find_task_by_pid_ns(uhd->pid, &init_pid_ns);
+        if (!t) {
+                toi_read_unlock_tasklist();
+                if (uhd->pid > -1)
+                        printk(KERN_INFO "Hmm. Can't find the userspace task"
+                                " %d.\n", uhd->pid);
+                return;
+        }
+        wake_up_process(t);
+        toi_read_unlock_tasklist();
+
+        yield();
+}
+
+static void send_whether_debugging(struct user_helper_data *uhd)
+{
+        static u8 is_debugging = 1;
+
+        toi_send_netlink_message(uhd, NETLINK_MSG_IS_DEBUGGING,
+                        &is_debugging, sizeof(u8));
+}
+
+/*
+ * Set the PF_NOFREEZE flag on the given process to ensure it can run whilst we
+ * are hibernating.
+ */
+static int nl_set_nofreeze(struct user_helper_data *uhd, __u32 pid)
+{
+        struct task_struct *t;
+
+        if (uhd->debug)
+                printk(KERN_ERR "nl_set_nofreeze for pid %d.\n", pid);
+
+        toi_read_lock_tasklist();
+        t = find_task_by_pid_ns(pid, &init_pid_ns);
+        if (!t) {
+                toi_read_unlock_tasklist();
+                printk(KERN_INFO "Strange. Can't find the userspace task %d.\n",
+                                pid);
+                return -EINVAL;
+        }
+
+        t->flags |= PF_NOFREEZE;
+
+        toi_read_unlock_tasklist();
+        uhd->pid = pid;
+
+        toi_send_netlink_message(uhd, NETLINK_MSG_NOFREEZE_ACK, NULL, 0);
+
+        return 0;
+}
+
+/*
+ * Called when the userspace process has informed us that it's ready to roll.
+ */
+static int nl_ready(struct user_helper_data *uhd, u32 version)
+{
+        if (version != uhd->interface_version) {
+                printk(KERN_INFO "%s userspace process using invalid interface"
+                                " version (%d - kernel wants %d). Trying to "
+                                "continue without it.\n",
+                                uhd->name, version, uhd->interface_version);
+                if (uhd->not_ready)
+                        uhd->not_ready();
+                return -EINVAL;
+        }
+
+        complete(&uhd->wait_for_process);
+
+        return 0;
+}
+
+void toi_netlink_close_complete(struct user_helper_data *uhd)
+{
+        if (uhd->nl) {
+                netlink_kernel_release(uhd->nl);
+                uhd->nl = NULL;
+        }
+
+        while (uhd->emerg_skbs) {
+                struct sk_buff *next = uhd->emerg_skbs->next;
+                kfree_skb(uhd->emerg_skbs);
+                uhd->emerg_skbs = next;
+        }
+
+        uhd->pid = -1;
+}
+
+static int toi_nl_gen_rcv_msg(struct user_helper_data *uhd,
+                struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+        int type = nlh->nlmsg_type;
+        int *data;
+        int err;
+
+        if (uhd->debug)
+                printk(KERN_ERR "toi_user_rcv_skb: Received message %d.\n",
+                                type);
+
+        /* Let the more specific handler go first. It returns
+         * 1 for valid messages that it doesn't know. */
+        err = uhd->rcv_msg(skb, nlh);
+        if (err != 1)
+                return err;
+
+        /* Only allow one task to receive NOFREEZE privileges */
+        if (type == NETLINK_MSG_NOFREEZE_ME && uhd->pid != -1) {
+                printk(KERN_INFO "Received extra nofreeze me requests.\n");
+                return -EBUSY;
+        }
+
+        data = NLMSG_DATA(nlh);
+
+        switch (type) {
+        case NETLINK_MSG_NOFREEZE_ME:
+                return nl_set_nofreeze(uhd, nlh->nlmsg_pid);
+        case NETLINK_MSG_GET_DEBUGGING:
+                send_whether_debugging(uhd);
+                return 0;
+        case NETLINK_MSG_READY:
+                if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(u32))) {
+                        printk(KERN_INFO "Invalid ready mesage.\n");
+                        if (uhd->not_ready)
+                                uhd->not_ready();
+                        return -EINVAL;
+                }
+                return nl_ready(uhd, (u32) *data);
+        case NETLINK_MSG_CLEANUP:
+                toi_netlink_close_complete(uhd);
+                return 0;
+        }
+
+        return -EINVAL;
+}
+
+static void toi_user_rcv_skb(struct sk_buff *skb)
+{
+        int err;
+        struct nlmsghdr *nlh;
+        struct user_helper_data *uhd = uhd_list;
+
+        while (uhd && uhd->netlink_id != skb->sk->sk_protocol)
+                uhd = uhd->next;
+
+        if (!uhd)
+                return;
+
+        while (skb->len >= NLMSG_SPACE(0)) {
+                u32 rlen;
+
+                nlh = (struct nlmsghdr *) skb->data;
+                if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+                        return;
+
+                rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+                if (rlen > skb->len)
+                        rlen = skb->len;
+
+                err = toi_nl_gen_rcv_msg(uhd, skb, nlh);
+                if (err)
+                        netlink_ack(skb, nlh, err);
+                else if (nlh->nlmsg_flags & NLM_F_ACK)
+                        netlink_ack(skb, nlh, 0);
+                skb_pull(skb, rlen);
+        }
+}
+
+static int netlink_prepare(struct user_helper_data *uhd)
+{
+        struct netlink_kernel_cfg cfg = {
+                .groups = 0,
+                .input = toi_user_rcv_skb,
+        };
+
+        uhd->next = uhd_list;
+        uhd_list = uhd;
+
+        uhd->sock_seq = 0x42c0ffee;
+        uhd->nl = netlink_kernel_create(&init_net, uhd->netlink_id, &cfg);
+        if (!uhd->nl) {
+                printk(KERN_INFO "Failed to allocate netlink socket for %s.\n",
+                                uhd->name);
+                return -ENOMEM;
+        }
+
+        toi_fill_skb_pool(uhd);
+
+        return 0;
+}
+
+void toi_netlink_close(struct user_helper_data *uhd)
+{
+        struct task_struct *t;
+
+        toi_read_lock_tasklist();
+        t = find_task_by_pid_ns(uhd->pid, &init_pid_ns);
+        if (t)
+                t->flags &= ~PF_NOFREEZE;
+        toi_read_unlock_tasklist();
+
+        toi_send_netlink_message(uhd, NETLINK_MSG_CLEANUP, NULL, 0);
+}
+int toi_netlink_setup(struct user_helper_data *uhd)
+{
+        /* In case userui didn't cleanup properly on us */
+        toi_netlink_close_complete(uhd);
+
+        if (netlink_prepare(uhd) < 0) {
+                printk(KERN_INFO "Netlink prepare failed.\n");
+                return 1;
+        }
+
+        if (toi_launch_userspace_program(uhd->program, uhd->netlink_id,
+                                UMH_WAIT_EXEC, uhd->debug) < 0) {
+                printk(KERN_INFO "Launch userspace program failed.\n");
+                toi_netlink_close_complete(uhd);
+                return 1;
+        }
+
+        /* Wait 2 seconds for the userspace process to make contact */
+        wait_for_completion_timeout(&uhd->wait_for_process, 2*HZ);
+
+        if (uhd->pid == -1) {
+                printk(KERN_INFO "%s: Failed to contact userspace process.\n",
+                                uhd->name);
+                toi_netlink_close_complete(uhd);
+                return 1;
+        }
+
+        return 0;
+}
diff -Nur linux-4.2/kernel/power/tuxonice_netlink.h linux-4.2-pck/kernel/power/tuxonice_netlink.h
--- linux-4.2/kernel/power/tuxonice_netlink.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_netlink.h	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,62 @@
+/*
+ * kernel/power/tuxonice_netlink.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Declarations for functions for communicating with a userspace helper
+ * via netlink.
+ */
+
+#include <linux/netlink.h>
+#include <net/sock.h>
+
+#define NETLINK_MSG_BASE 0x10
+
+#define NETLINK_MSG_READY 0x10
+#define        NETLINK_MSG_NOFREEZE_ME 0x16
+#define NETLINK_MSG_GET_DEBUGGING 0x19
+#define NETLINK_MSG_CLEANUP 0x24
+#define NETLINK_MSG_NOFREEZE_ACK 0x27
+#define NETLINK_MSG_IS_DEBUGGING 0x28
+
+struct user_helper_data {
+        int (*rcv_msg) (struct sk_buff *skb, struct nlmsghdr *nlh);
+        void (*not_ready) (void);
+        struct sock *nl;
+        u32 sock_seq;
+        pid_t pid;
+        char *comm;
+        char program[256];
+        int pool_level;
+        int pool_limit;
+        struct sk_buff *emerg_skbs;
+        int skb_size;
+        int netlink_id;
+        char *name;
+        struct user_helper_data *next;
+        struct completion wait_for_process;
+        u32 interface_version;
+        int must_init;
+        int debug;
+};
+
+#ifdef CONFIG_NET
+int toi_netlink_setup(struct user_helper_data *uhd);
+void toi_netlink_close(struct user_helper_data *uhd);
+void toi_send_netlink_message(struct user_helper_data *uhd,
+                int type, void *params, size_t len);
+void toi_netlink_close_complete(struct user_helper_data *uhd);
+#else
+static inline int toi_netlink_setup(struct user_helper_data *uhd)
+{
+        return 0;
+}
+
+static inline void toi_netlink_close(struct user_helper_data *uhd) { };
+static inline void toi_send_netlink_message(struct user_helper_data *uhd,
+                int type, void *params, size_t len) { };
+static inline void toi_netlink_close_complete(struct user_helper_data *uhd)
+        { };
+#endif
diff -Nur linux-4.2/kernel/power/tuxonice_pagedir.c linux-4.2-pck/kernel/power/tuxonice_pagedir.c
--- linux-4.2/kernel/power/tuxonice_pagedir.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_pagedir.c	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,345 @@
+/*
+ * kernel/power/tuxonice_pagedir.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for handling pagesets.
+ * Note that pbes aren't actually stored as such. They're stored as
+ * bitmaps and extents.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <linux/cpu.h>
+#include <asm/tlbflush.h>
+
+#include "tuxonice_pageflags.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_alloc.h"
+
+static int ptoi_pfn;
+static struct pbe *this_low_pbe;
+static struct pbe **last_low_pbe_ptr;
+
+void toi_reset_alt_image_pageset2_pfn(void)
+{
+  memory_bm_position_reset(pageset2_map);
+}
+
+static struct page *first_conflicting_page;
+
+/*
+ * free_conflicting_pages
+ */
+
+static void free_conflicting_pages(void)
+{
+        while (first_conflicting_page) {
+                struct page *next =
+                        *((struct page **) kmap(first_conflicting_page));
+                kunmap(first_conflicting_page);
+                toi__free_page(29, first_conflicting_page);
+                first_conflicting_page = next;
+        }
+}
+
+/* __toi_get_nonconflicting_page
+ *
+ * Description: Gets order zero pages that won't be overwritten
+ *                while copying the original pages.
+ */
+
+struct page *___toi_get_nonconflicting_page(int can_be_highmem)
+{
+        struct page *page;
+        gfp_t flags = TOI_ATOMIC_GFP;
+        if (can_be_highmem)
+                flags |= __GFP_HIGHMEM;
+
+
+        if (test_toi_state(TOI_LOADING_ALT_IMAGE) &&
+                        pageset2_map && ptoi_pfn) {
+                do {
+                        ptoi_pfn = memory_bm_next_pfn(pageset2_map, 0);
+                        if (ptoi_pfn != BM_END_OF_MAP) {
+                                page = pfn_to_page(ptoi_pfn);
+                                if (!PagePageset1(page) &&
+                                    (can_be_highmem || !PageHighMem(page)))
+                                        return page;
+                        }
+                } while (ptoi_pfn);
+        }
+
+        do {
+                page = toi_alloc_page(29, flags | __GFP_ZERO);
+                if (!page) {
+                        printk(KERN_INFO "Failed to get nonconflicting "
+                                        "page.\n");
+                        return NULL;
+                }
+                if (PagePageset1(page)) {
+                        struct page **next = (struct page **) kmap(page);
+                        *next = first_conflicting_page;
+                        first_conflicting_page = page;
+                        kunmap(page);
+                }
+        } while (PagePageset1(page));
+
+        return page;
+}
+
+unsigned long __toi_get_nonconflicting_page(void)
+{
+        struct page *page = ___toi_get_nonconflicting_page(0);
+        return page ? (unsigned long) page_address(page) : 0;
+}
+
+static struct pbe *get_next_pbe(struct page **page_ptr, struct pbe *this_pbe,
+                int highmem)
+{
+        if (((((unsigned long) this_pbe) & (PAGE_SIZE - 1))
+                     + 2 * sizeof(struct pbe)) > PAGE_SIZE) {
+                struct page *new_page =
+                        ___toi_get_nonconflicting_page(highmem);
+                if (!new_page)
+                        return ERR_PTR(-ENOMEM);
+                this_pbe = (struct pbe *) kmap(new_page);
+                memset(this_pbe, 0, PAGE_SIZE);
+                *page_ptr = new_page;
+        } else
+                this_pbe++;
+
+        return this_pbe;
+}
+
+/**
+ * get_pageset1_load_addresses - generate pbes for conflicting pages
+ *
+ * We check here that pagedir & pages it points to won't collide
+ * with pages where we're going to restore from the loaded pages
+ * later.
+ *
+ * Returns:
+ *        Zero on success, one if couldn't find enough pages (shouldn't
+ *        happen).
+ **/
+int toi_get_pageset1_load_addresses(void)
+{
+        int pfn, highallocd = 0, lowallocd = 0;
+        int low_needed = pagedir1.size - get_highmem_size(pagedir1);
+        int high_needed = get_highmem_size(pagedir1);
+        int low_pages_for_highmem = 0;
+        gfp_t flags = GFP_ATOMIC | __GFP_NOWARN | __GFP_HIGHMEM;
+        struct page *page, *high_pbe_page = NULL, *last_high_pbe_page = NULL,
+                    *low_pbe_page, *last_low_pbe_page = NULL;
+        struct pbe **last_high_pbe_ptr = &restore_highmem_pblist,
+                   *this_high_pbe = NULL;
+        unsigned long orig_low_pfn, orig_high_pfn;
+        int high_pbes_done = 0, low_pbes_done = 0;
+        int low_direct = 0, high_direct = 0, result = 0, i;
+        int high_page = 1, high_offset = 0, low_page = 1, low_offset = 0;
+
+        toi_trace_index++;
+
+        memory_bm_position_reset(pageset1_map);
+        memory_bm_position_reset(pageset1_copy_map);
+
+        last_low_pbe_ptr = &restore_pblist;
+
+        /* First, allocate pages for the start of our pbe lists. */
+        if (high_needed) {
+                high_pbe_page = ___toi_get_nonconflicting_page(1);
+                if (!high_pbe_page) {
+                        result = -ENOMEM;
+                        goto out;
+                }
+                this_high_pbe = (struct pbe *) kmap(high_pbe_page);
+                memset(this_high_pbe, 0, PAGE_SIZE);
+        }
+
+        low_pbe_page = ___toi_get_nonconflicting_page(0);
+        if (!low_pbe_page) {
+                result = -ENOMEM;
+                goto out;
+        }
+        this_low_pbe = (struct pbe *) page_address(low_pbe_page);
+
+        /*
+         * Next, allocate the number of pages we need.
+         */
+
+        i = low_needed + high_needed;
+
+        do {
+                int is_high;
+
+                if (i == low_needed)
+                        flags &= ~__GFP_HIGHMEM;
+
+                page = toi_alloc_page(30, flags);
+                BUG_ON(!page);
+
+                SetPagePageset1Copy(page);
+                is_high = PageHighMem(page);
+
+                if (PagePageset1(page)) {
+                        if (is_high)
+                                high_direct++;
+                        else
+                                low_direct++;
+                } else {
+                        if (is_high)
+                                highallocd++;
+                        else
+                                lowallocd++;
+                }
+        } while (--i);
+
+        high_needed -= high_direct;
+        low_needed -= low_direct;
+
+        /*
+         * Do we need to use some lowmem pages for the copies of highmem
+         * pages?
+         */
+        if (high_needed > highallocd) {
+                low_pages_for_highmem = high_needed - highallocd;
+                high_needed -= low_pages_for_highmem;
+                low_needed += low_pages_for_highmem;
+        }
+
+        /*
+         * Now generate our pbes (which will be used for the atomic restore),
+         * and free unneeded pages.
+         */
+        memory_bm_position_reset(pageset1_copy_map);
+        for (pfn = memory_bm_next_pfn(pageset1_copy_map, 0); pfn != BM_END_OF_MAP;
+                        pfn = memory_bm_next_pfn(pageset1_copy_map, 0)) {
+                int is_high;
+                page = pfn_to_page(pfn);
+                is_high = PageHighMem(page);
+
+                if (PagePageset1(page))
+                        continue;
+
+                /* Nope. We're going to use this page. Add a pbe. */
+                if (is_high || low_pages_for_highmem) {
+                        struct page *orig_page;
+                        high_pbes_done++;
+                        if (!is_high)
+                                low_pages_for_highmem--;
+                        do {
+                                orig_high_pfn = memory_bm_next_pfn(pageset1_map, 0);
+                                BUG_ON(orig_high_pfn == BM_END_OF_MAP);
+                                orig_page = pfn_to_page(orig_high_pfn);
+                        } while (!PageHighMem(orig_page) ||
+                                        PagePageset1Copy(orig_page));
+
+                        this_high_pbe->orig_address = (void *) orig_high_pfn;
+                        this_high_pbe->address = page;
+                        this_high_pbe->next = NULL;
+                        toi_message(TOI_PAGEDIR, TOI_VERBOSE, 0, "High pbe %d/%d: %p(%d)=>%p",
+                                        high_page, high_offset, page, orig_high_pfn, orig_page);
+                        if (last_high_pbe_page != high_pbe_page) {
+                                *last_high_pbe_ptr =
+                                        (struct pbe *) high_pbe_page;
+                                if (last_high_pbe_page) {
+                                        kunmap(last_high_pbe_page);
+                                        high_page++;
+                                        high_offset = 0;
+                                } else
+                                        high_offset++;
+                                last_high_pbe_page = high_pbe_page;
+                        } else {
+                                *last_high_pbe_ptr = this_high_pbe;
+                                high_offset++;
+                        }
+                        last_high_pbe_ptr = &this_high_pbe->next;
+                        this_high_pbe = get_next_pbe(&high_pbe_page,
+                                        this_high_pbe, 1);
+                        if (IS_ERR(this_high_pbe)) {
+                                printk(KERN_INFO
+                                                "This high pbe is an error.\n");
+                                return -ENOMEM;
+                        }
+                } else {
+                        struct page *orig_page;
+                        low_pbes_done++;
+                        do {
+                                orig_low_pfn = memory_bm_next_pfn(pageset1_map, 0);
+                                BUG_ON(orig_low_pfn == BM_END_OF_MAP);
+                                orig_page = pfn_to_page(orig_low_pfn);
+                        } while (PageHighMem(orig_page) ||
+                                        PagePageset1Copy(orig_page));
+
+                        this_low_pbe->orig_address = page_address(orig_page);
+                        this_low_pbe->address = page_address(page);
+                        this_low_pbe->next = NULL;
+                        toi_message(TOI_PAGEDIR, TOI_VERBOSE, 0, "Low pbe %d/%d: %p(%d)=>%p",
+                                        low_page, low_offset, this_low_pbe->orig_address,
+                                        orig_low_pfn, this_low_pbe->address);
+                        TOI_TRACE_DEBUG(orig_low_pfn, "LoadAddresses (%d/%d): %p=>%p", low_page, low_offset, this_low_pbe->orig_address, this_low_pbe->address);
+                        *last_low_pbe_ptr = this_low_pbe;
+                        last_low_pbe_ptr = &this_low_pbe->next;
+                        this_low_pbe = get_next_pbe(&low_pbe_page,
+                                        this_low_pbe, 0);
+                        if (low_pbe_page != last_low_pbe_page) {
+                                if (last_low_pbe_page) {
+                                        low_page++;
+                                        low_offset = 0;
+                                } else {
+                                    low_offset++;
+                                }
+                                last_low_pbe_page = low_pbe_page;
+                        } else
+                                low_offset++;
+                        if (IS_ERR(this_low_pbe)) {
+                                printk(KERN_INFO "this_low_pbe is an error.\n");
+                                return -ENOMEM;
+                        }
+                }
+        }
+
+        if (high_pbe_page)
+                kunmap(high_pbe_page);
+
+        if (last_high_pbe_page != high_pbe_page) {
+                if (last_high_pbe_page)
+                        kunmap(last_high_pbe_page);
+                toi__free_page(29, high_pbe_page);
+        }
+
+        free_conflicting_pages();
+
+out:
+        return result;
+}
+
+int add_boot_kernel_data_pbe(void)
+{
+        this_low_pbe->address = (char *) __toi_get_nonconflicting_page();
+        if (!this_low_pbe->address) {
+                printk(KERN_INFO "Failed to get bkd atomic restore buffer.");
+                return -ENOMEM;
+        }
+
+        toi_bkd.size = sizeof(toi_bkd);
+        memcpy(this_low_pbe->address, &toi_bkd, sizeof(toi_bkd));
+
+        *last_low_pbe_ptr = this_low_pbe;
+        this_low_pbe->orig_address = (char *) boot_kernel_data_buffer;
+        this_low_pbe->next = NULL;
+        return 0;
+}
diff -Nur linux-4.2/kernel/power/tuxonice_pagedir.h linux-4.2-pck/kernel/power/tuxonice_pagedir.h
--- linux-4.2/kernel/power/tuxonice_pagedir.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_pagedir.h	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,50 @@
+/*
+ * kernel/power/tuxonice_pagedir.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Declarations for routines for handling pagesets.
+ */
+
+#ifndef KERNEL_POWER_PAGEDIR_H
+#define KERNEL_POWER_PAGEDIR_H
+
+/* Pagedir
+ *
+ * Contains the metadata for a set of pages saved in the image.
+ */
+
+struct pagedir {
+        int id;
+        unsigned long size;
+#ifdef CONFIG_HIGHMEM
+        unsigned long size_high;
+#endif
+};
+
+#ifdef CONFIG_HIGHMEM
+#define get_highmem_size(pagedir) (pagedir.size_high)
+#define set_highmem_size(pagedir, sz) do { pagedir.size_high = sz; } while (0)
+#define inc_highmem_size(pagedir) do { pagedir.size_high++; } while (0)
+#define get_lowmem_size(pagedir) (pagedir.size - pagedir.size_high)
+#else
+#define get_highmem_size(pagedir) (0)
+#define set_highmem_size(pagedir, sz) do { } while (0)
+#define inc_highmem_size(pagedir) do { } while (0)
+#define get_lowmem_size(pagedir) (pagedir.size)
+#endif
+
+extern struct pagedir pagedir1, pagedir2;
+
+extern void toi_copy_pageset1(void);
+
+extern int toi_get_pageset1_load_addresses(void);
+
+extern unsigned long __toi_get_nonconflicting_page(void);
+struct page *___toi_get_nonconflicting_page(int can_be_highmem);
+
+extern void toi_reset_alt_image_pageset2_pfn(void);
+extern int add_boot_kernel_data_pbe(void);
+#endif
diff -Nur linux-4.2/kernel/power/tuxonice_pageflags.c linux-4.2-pck/kernel/power/tuxonice_pageflags.c
--- linux-4.2/kernel/power/tuxonice_pageflags.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_pageflags.c	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,18 @@
+/*
+ * kernel/power/tuxonice_pageflags.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for serialising and relocating pageflags in which we
+ * store our image metadata.
+ */
+
+#include "tuxonice_pageflags.h"
+#include "power.h"
+
+int toi_pageflags_space_needed(void)
+{
+        return memory_bm_space_needed(pageset1_map);
+}
diff -Nur linux-4.2/kernel/power/tuxonice_pageflags.h linux-4.2-pck/kernel/power/tuxonice_pageflags.h
--- linux-4.2/kernel/power/tuxonice_pageflags.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_pageflags.h	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,106 @@
+/*
+ * kernel/power/tuxonice_pageflags.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef KERNEL_POWER_TUXONICE_PAGEFLAGS_H
+#define KERNEL_POWER_TUXONICE_PAGEFLAGS_H
+
+struct  memory_bitmap;
+void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
+void memory_bm_clear(struct memory_bitmap *bm);
+
+int mem_bm_set_bit_check(struct memory_bitmap *bm, int index, unsigned long pfn);
+void memory_bm_set_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+unsigned long memory_bm_next_pfn(struct memory_bitmap *bm, int index);
+unsigned long memory_bm_next_pfn_index(struct memory_bitmap *bm, int index);
+void memory_bm_position_reset(struct memory_bitmap *bm);
+void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
+int toi_alloc_bitmap(struct memory_bitmap **bm);
+void toi_free_bitmap(struct memory_bitmap **bm);
+void memory_bm_clear(struct memory_bitmap *bm);
+void memory_bm_clear_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+void memory_bm_set_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+int memory_bm_test_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+int memory_bm_test_bit_index(struct memory_bitmap *bm, int index, unsigned long pfn);
+void memory_bm_clear_bit_index(struct memory_bitmap *bm, int index, unsigned long pfn);
+
+struct toi_module_ops;
+int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
+        (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
+int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
+        (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
+int memory_bm_space_needed(struct memory_bitmap *bm);
+
+extern struct memory_bitmap *pageset1_map;
+extern struct memory_bitmap *pageset1_copy_map;
+extern struct memory_bitmap *pageset2_map;
+extern struct memory_bitmap *page_resave_map;
+extern struct memory_bitmap *io_map;
+extern struct memory_bitmap *nosave_map;
+extern struct memory_bitmap *free_map;
+extern struct memory_bitmap *compare_map;
+
+#define PagePageset1(page) \
+        (pageset1_map && memory_bm_test_bit(pageset1_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPagePageset1(page) \
+        (memory_bm_set_bit(pageset1_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPagePageset1(page) \
+        (memory_bm_clear_bit(pageset1_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PagePageset1Copy(page) \
+        (memory_bm_test_bit(pageset1_copy_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPagePageset1Copy(page) \
+        (memory_bm_set_bit(pageset1_copy_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPagePageset1Copy(page) \
+        (memory_bm_clear_bit(pageset1_copy_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PagePageset2(page) \
+        (memory_bm_test_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPagePageset2(page) \
+        (memory_bm_set_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPagePageset2(page) \
+        (memory_bm_clear_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageWasRW(page) \
+        (memory_bm_test_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPageWasRW(page) \
+        (memory_bm_set_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageWasRW(page) \
+        (memory_bm_clear_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageResave(page) (page_resave_map ? \
+        memory_bm_test_bit(page_resave_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageResave(page) \
+        (memory_bm_set_bit(page_resave_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageResave(page) \
+        (memory_bm_clear_bit(page_resave_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageNosave(page) (nosave_map ? \
+        memory_bm_test_bit(nosave_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageNosave(page) \
+        (mem_bm_set_bit_check(nosave_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageNosave(page) \
+        (memory_bm_clear_bit(nosave_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageNosaveFree(page) (free_map ? \
+                memory_bm_test_bit(free_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageNosaveFree(page) \
+        (memory_bm_set_bit(free_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageNosaveFree(page) \
+        (memory_bm_clear_bit(free_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageCompareChanged(page) (compare_map ? \
+                memory_bm_test_bit(compare_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageCompareChanged(page) \
+        (memory_bm_set_bit(compare_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageCompareChanged(page) \
+        (memory_bm_clear_bit(compare_map, smp_processor_id(), page_to_pfn(page)))
+
+extern void save_pageflags(struct memory_bitmap *pagemap);
+extern int load_pageflags(struct memory_bitmap *pagemap);
+extern int toi_pageflags_space_needed(void);
+#endif
diff -Nur linux-4.2/kernel/power/tuxonice_power_off.c linux-4.2-pck/kernel/power/tuxonice_power_off.c
--- linux-4.2/kernel/power/tuxonice_power_off.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_power_off.c	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,286 @@
+/*
+ * kernel/power/tuxonice_power_off.c
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Support for powering down.
+ */
+
+#include <linux/device.h>
+#include <linux/suspend.h>
+#include <linux/mm.h>
+#include <linux/pm.h>
+#include <linux/reboot.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+#include <linux/fs.h>
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_io.h"
+
+unsigned long toi_poweroff_method; /* 0 - Kernel power off */
+
+static int wake_delay;
+static char lid_state_file[256], wake_alarm_dir[256];
+static struct file *lid_file, *alarm_file, *epoch_file;
+static int post_wake_state = -1;
+
+static int did_suspend_to_both;
+
+/*
+ * __toi_power_down
+ * Functionality   : Powers down or reboots the computer once the image
+ *                   has been written to disk.
+ * Key Assumptions : Able to reboot/power down via code called or that
+ *                   the warning emitted if the calls fail will be visible
+ *                   to the user (ie printk resumes devices).
+ */
+
+static void __toi_power_down(int method)
+{
+        int error;
+
+        toi_cond_pause(1, test_action_state(TOI_REBOOT) ? "Ready to reboot." :
+                        "Powering down.");
+
+        if (test_result_state(TOI_ABORTED))
+                goto out;
+
+        if (test_action_state(TOI_REBOOT))
+                kernel_restart(NULL);
+
+        switch (method) {
+        case 0:
+                break;
+        case 3:
+                /*
+                 * Re-read the overwritten part of pageset2 to make post-resume
+                 * faster.
+                 */
+                if (read_pageset2(1))
+                        panic("Attempt to reload pagedir 2 failed. "
+                                        "Try rebooting.");
+
+                pm_prepare_console();
+
+                error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+                if (!error) {
+                        pm_restore_gfp_mask();
+                        error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+                        pm_restrict_gfp_mask();
+                        if (!error)
+                                did_suspend_to_both = 1;
+                }
+                pm_notifier_call_chain(PM_POST_SUSPEND);
+                pm_restore_console();
+
+                /* Success - we're now post-resume-from-ram */
+                if (did_suspend_to_both)
+                        return;
+
+                /* Failed to suspend to ram - do normal power off */
+                break;
+        case 4:
+                /*
+                 * If succeeds, doesn't return. If fails, do a simple
+                 * powerdown.
+                 */
+                hibernation_platform_enter();
+                break;
+        case 5:
+                /* Historic entry only now */
+                break;
+        }
+
+        if (method && method != 5)
+                toi_cond_pause(1,
+                        "Falling back to alternate power off method.");
+
+        if (test_result_state(TOI_ABORTED))
+                goto out;
+
+        if (pm_power_off)
+            kernel_power_off();
+        kernel_halt();
+        toi_cond_pause(1, "Powerdown failed.");
+        while (1)
+                cpu_relax();
+
+out:
+        if (read_pageset2(1))
+                panic("Attempt to reload pagedir 2 failed. Try rebooting.");
+        return;
+}
+
+#define CLOSE_FILE(file) \
+        if (file) { \
+                filp_close(file, NULL); file = NULL; \
+        }
+
+static void powerdown_cleanup(int toi_or_resume)
+{
+        if (!toi_or_resume)
+                return;
+
+        CLOSE_FILE(lid_file);
+        CLOSE_FILE(alarm_file);
+        CLOSE_FILE(epoch_file);
+}
+
+static void open_file(char *format, char *arg, struct file **var, int mode,
+                char *desc)
+{
+        char buf[256];
+
+        if (strlen(arg)) {
+                sprintf(buf, format, arg);
+                *var = filp_open(buf, mode, 0);
+                if (IS_ERR(*var) || !*var) {
+                        printk(KERN_INFO "Failed to open %s file '%s' (%p).\n",
+                                desc, buf, *var);
+                        *var = NULL;
+                }
+        }
+}
+
+static int powerdown_init(int toi_or_resume)
+{
+        if (!toi_or_resume)
+                return 0;
+
+        did_suspend_to_both = 0;
+
+        open_file("/proc/acpi/button/%s/state", lid_state_file, &lid_file,
+                        O_RDONLY, "lid");
+
+        if (strlen(wake_alarm_dir)) {
+                open_file("/sys/class/rtc/%s/wakealarm", wake_alarm_dir,
+                                &alarm_file, O_WRONLY, "alarm");
+
+                open_file("/sys/class/rtc/%s/since_epoch", wake_alarm_dir,
+                                &epoch_file, O_RDONLY, "epoch");
+        }
+
+        return 0;
+}
+
+static int lid_closed(void)
+{
+        char array[25];
+        ssize_t size;
+        loff_t pos = 0;
+
+        if (!lid_file)
+                return 0;
+
+        size = vfs_read(lid_file, (char __user *) array, 25, &pos);
+        if ((int) size < 1) {
+                printk(KERN_INFO "Failed to read lid state file (%d).\n",
+                        (int) size);
+                return 0;
+        }
+
+        if (!strcmp(array, "state:      closed\n"))
+                return 1;
+
+        return 0;
+}
+
+static void write_alarm_file(int value)
+{
+        ssize_t size;
+        char buf[40];
+        loff_t pos = 0;
+
+        if (!alarm_file)
+                return;
+
+        sprintf(buf, "%d\n", value);
+
+        size = vfs_write(alarm_file, (char __user *)buf, strlen(buf), &pos);
+
+        if (size < 0)
+                printk(KERN_INFO "Error %d writing alarm value %s.\n",
+                                (int) size, buf);
+}
+
+/**
+ * toi_check_resleep: See whether to powerdown again after waking.
+ *
+ * After waking, check whether we should powerdown again in a (usually
+ * different) way. We only do this if the lid switch is still closed.
+ */
+void toi_check_resleep(void)
+{
+        /* We only return if we suspended to ram and woke. */
+        if (lid_closed() && post_wake_state >= 0)
+                __toi_power_down(post_wake_state);
+}
+
+void toi_power_down(void)
+{
+        if (alarm_file && wake_delay) {
+                char array[25];
+                loff_t pos = 0;
+                size_t size = vfs_read(epoch_file, (char __user *) array, 25,
+                                &pos);
+
+                if (((int) size) < 1)
+                        printk(KERN_INFO "Failed to read epoch file (%d).\n",
+                                        (int) size);
+                else {
+                        unsigned long since_epoch;
+                        if (!kstrtoul(array, 0, &since_epoch)) {
+                                /* Clear any wakeup time. */
+                                write_alarm_file(0);
+
+                                /* Set new wakeup time. */
+                                write_alarm_file(since_epoch + wake_delay);
+                        }
+                }
+        }
+
+        __toi_power_down(toi_poweroff_method);
+
+        toi_check_resleep();
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_ACPI)
+        SYSFS_STRING("lid_file", SYSFS_RW, lid_state_file, 256, 0, NULL),
+        SYSFS_INT("wake_delay", SYSFS_RW, &wake_delay, 0, INT_MAX, 0, NULL),
+        SYSFS_STRING("wake_alarm_dir", SYSFS_RW, wake_alarm_dir, 256, 0, NULL),
+        SYSFS_INT("post_wake_state", SYSFS_RW, &post_wake_state, -1, 5, 0,
+                        NULL),
+        SYSFS_UL("powerdown_method", SYSFS_RW, &toi_poweroff_method, 0, 5, 0),
+        SYSFS_INT("did_suspend_to_both", SYSFS_READONLY, &did_suspend_to_both,
+                0, 0, 0, NULL)
+#endif
+};
+
+static struct toi_module_ops powerdown_ops = {
+        .type                                = MISC_HIDDEN_MODULE,
+        .name                                = "poweroff",
+        .initialise                        = powerdown_init,
+        .cleanup                        = powerdown_cleanup,
+        .directory                        = "[ROOT]",
+        .module                                = THIS_MODULE,
+        .sysfs_data                        = sysfs_params,
+        .num_sysfs_entries                = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+int toi_poweroff_init(void)
+{
+        return toi_register_module(&powerdown_ops);
+}
+
+void toi_poweroff_exit(void)
+{
+        toi_unregister_module(&powerdown_ops);
+}
diff -Nur linux-4.2/kernel/power/tuxonice_power_off.h linux-4.2-pck/kernel/power/tuxonice_power_off.h
--- linux-4.2/kernel/power/tuxonice_power_off.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_power_off.h	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,24 @@
+/*
+ * kernel/power/tuxonice_power_off.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Support for the powering down.
+ */
+
+int toi_pm_state_finish(void);
+void toi_power_down(void);
+extern unsigned long toi_poweroff_method;
+int toi_poweroff_init(void);
+void toi_poweroff_exit(void);
+void toi_check_resleep(void);
+
+extern int platform_begin(int platform_mode);
+extern int platform_pre_snapshot(int platform_mode);
+extern void platform_leave(int platform_mode);
+extern void platform_end(int platform_mode);
+extern void platform_finish(int platform_mode);
+extern int platform_pre_restore(int platform_mode);
+extern void platform_restore_cleanup(int platform_mode);
diff -Nur linux-4.2/kernel/power/tuxonice_prepare_image.c linux-4.2-pck/kernel/power/tuxonice_prepare_image.c
--- linux-4.2/kernel/power/tuxonice_prepare_image.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_prepare_image.c	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,1080 @@
+/*
+ * kernel/power/tuxonice_prepare_image.c
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * We need to eat memory until we can:
+ * 1. Perform the save without changing anything (RAM_NEEDED < #pages)
+ * 2. Fit it all in available space (toiActiveAllocator->available_space() >=
+ *    main_storage_needed())
+ * 3. Reload the pagedir and pageset1 to places that don't collide with their
+ *    final destinations, not knowing to what extent the resumed kernel will
+ *    overlap with the one loaded at boot time. I think the resumed kernel
+ *    should overlap completely, but I don't want to rely on this as it is
+ *    an unproven assumption. We therefore assume there will be no overlap at
+ *    all (worse case).
+ * 4. Meet the user's requested limit (if any) on the size of the image.
+ *    The limit is in MB, so pages/256 (assuming 4K pages).
+ *
+ */
+
+#include <linux/highmem.h>
+#include <linux/freezer.h>
+#include <linux/hardirq.h>
+#include <linux/mmzone.h>
+#include <linux/console.h>
+#include <linux/tuxonice.h>
+
+#include "tuxonice_pageflags.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_atomic_copy.h"
+#include "tuxonice_builtin.h"
+
+static unsigned long num_nosave, main_storage_allocated, storage_limit,
+            header_storage_needed;
+unsigned long extra_pd1_pages_allowance =
+        CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE;
+long image_size_limit = CONFIG_TOI_DEFAULT_IMAGE_SIZE_LIMIT;
+static int no_ps2_needed;
+
+struct attention_list {
+        struct task_struct *task;
+        struct attention_list *next;
+};
+
+static struct attention_list *attention_list;
+
+#define PAGESET1 0
+#define PAGESET2 1
+
+void free_attention_list(void)
+{
+        struct attention_list *last = NULL;
+
+        while (attention_list) {
+                last = attention_list;
+                attention_list = attention_list->next;
+                toi_kfree(6, last, sizeof(*last));
+        }
+}
+
+static int build_attention_list(void)
+{
+        int i, task_count = 0;
+        struct task_struct *p;
+        struct attention_list *next;
+
+        /*
+         * Count all userspace process (with task->mm) marked PF_NOFREEZE.
+         */
+        toi_read_lock_tasklist();
+        for_each_process(p)
+                if ((p->flags & PF_NOFREEZE) || p == current)
+                        task_count++;
+        toi_read_unlock_tasklist();
+
+        /*
+         * Allocate attention list structs.
+         */
+        for (i = 0; i < task_count; i++) {
+                struct attention_list *this =
+                        toi_kzalloc(6, sizeof(struct attention_list),
+                                        TOI_WAIT_GFP);
+                if (!this) {
+                        printk(KERN_INFO "Failed to allocate slab for "
+                                        "attention list.\n");
+                        free_attention_list();
+                        return 1;
+                }
+                this->next = NULL;
+                if (attention_list)
+                        this->next = attention_list;
+                attention_list = this;
+        }
+
+        next = attention_list;
+        toi_read_lock_tasklist();
+        for_each_process(p)
+                if ((p->flags & PF_NOFREEZE) || p == current) {
+                        next->task = p;
+                        next = next->next;
+                }
+        toi_read_unlock_tasklist();
+        return 0;
+}
+
+static void pageset2_full(void)
+{
+        struct zone *zone;
+        struct page *page;
+        unsigned long flags;
+        int i;
+
+        toi_trace_index++;
+
+        for_each_populated_zone(zone) {
+                spin_lock_irqsave(&zone->lru_lock, flags);
+                for_each_lru(i) {
+                        if (!zone_page_state(zone, NR_LRU_BASE + i))
+                                continue;
+
+                        list_for_each_entry(page, &zone->lruvec.lists[i], lru) {
+                                struct address_space *mapping;
+
+                                mapping = page_mapping(page);
+                                if (!mapping || !mapping->host ||
+                                    !(mapping->host->i_flags & S_ATOMIC_COPY)) {
+                                    if (PageTOI_RO(page) && test_result_state(TOI_KEPT_IMAGE)) {
+                                        TOI_TRACE_DEBUG(page_to_pfn(page), "_Pageset2 unmodified.");
+                                    } else {
+                                        TOI_TRACE_DEBUG(page_to_pfn(page), "_Pageset2 pageset2_full.");
+                                        SetPagePageset2(page);
+                                    }
+                                }
+                        }
+                }
+                spin_unlock_irqrestore(&zone->lru_lock, flags);
+        }
+}
+
+/*
+ * toi_mark_task_as_pageset
+ * Functionality   : Marks all the saveable pages belonging to a given process
+ *                      as belonging to a particular pageset.
+ */
+
+static void toi_mark_task_as_pageset(struct task_struct *t, int pageset2)
+{
+        struct vm_area_struct *vma;
+        struct mm_struct *mm;
+
+        mm = t->active_mm;
+
+        if (!mm || !mm->mmap)
+                return;
+
+        toi_trace_index++;
+
+        if (!irqs_disabled())
+                down_read(&mm->mmap_sem);
+
+        for (vma = mm->mmap; vma; vma = vma->vm_next) {
+                unsigned long posn;
+
+                if (!vma->vm_start ||
+                    vma->vm_flags & VM_PFNMAP)
+                        continue;
+
+                for (posn = vma->vm_start; posn < vma->vm_end;
+                                posn += PAGE_SIZE) {
+                        struct page *page = follow_page(vma, posn, 0);
+                        struct address_space *mapping;
+
+                        if (!page || !pfn_valid(page_to_pfn(page)))
+                                continue;
+
+                        mapping = page_mapping(page);
+                        if (mapping && mapping->host &&
+                            mapping->host->i_flags & S_ATOMIC_COPY && pageset2)
+                                continue;
+
+                        if (PageTOI_RO(page) && test_result_state(TOI_KEPT_IMAGE)) {
+                                TOI_TRACE_DEBUG(page_to_pfn(page), "_Unmodified %d", pageset2 ? 1 : 2);
+                                continue;
+                        }
+
+                        if (pageset2) {
+                                TOI_TRACE_DEBUG(page_to_pfn(page), "_MarkTaskAsPageset 1");
+                                SetPagePageset2(page);
+                        } else {
+                                TOI_TRACE_DEBUG(page_to_pfn(page), "_MarkTaskAsPageset 2");
+                                ClearPagePageset2(page);
+                                SetPagePageset1(page);
+                        }
+                }
+        }
+
+        if (!irqs_disabled())
+                up_read(&mm->mmap_sem);
+}
+
+static void mark_tasks(int pageset)
+{
+        struct task_struct *p;
+
+        toi_read_lock_tasklist();
+        for_each_process(p) {
+                if (!p->mm)
+                        continue;
+
+                if (p->flags & PF_KTHREAD)
+                        continue;
+
+                toi_mark_task_as_pageset(p, pageset);
+        }
+        toi_read_unlock_tasklist();
+
+}
+
+/* mark_pages_for_pageset2
+ *
+ * Description:        Mark unshared pages in processes not needed for hibernate as
+ *                 being able to be written out in a separate pagedir.
+ *                 HighMem pages are simply marked as pageset2. They won't be
+ *                 needed during hibernate.
+ */
+
+static void toi_mark_pages_for_pageset2(void)
+{
+        struct attention_list *this = attention_list;
+
+        memory_bm_clear(pageset2_map);
+
+        if (test_action_state(TOI_NO_PAGESET2) || no_ps2_needed)
+                return;
+
+        if (test_action_state(TOI_PAGESET2_FULL))
+                pageset2_full();
+        else
+                mark_tasks(PAGESET2);
+
+        /*
+         * Because the tasks in attention_list are ones related to hibernating,
+         * we know that they won't go away under us.
+         */
+
+        while (this) {
+                if (!test_result_state(TOI_ABORTED))
+                        toi_mark_task_as_pageset(this->task, PAGESET1);
+                this = this->next;
+        }
+}
+
+/*
+ * The atomic copy of pageset1 is stored in pageset2 pages.
+ * But if pageset1 is larger (normally only just after boot),
+ * we need to allocate extra pages to store the atomic copy.
+ * The following data struct and functions are used to handle
+ * the allocation and freeing of that memory.
+ */
+
+static unsigned long extra_pages_allocated;
+
+struct extras {
+        struct page *page;
+        int order;
+        struct extras *next;
+};
+
+static struct extras *extras_list;
+
+/* toi_free_extra_pagedir_memory
+ *
+ * Description:        Free previously allocated extra pagedir memory.
+ */
+void toi_free_extra_pagedir_memory(void)
+{
+        /* Free allocated pages */
+        while (extras_list) {
+                struct extras *this = extras_list;
+                int i;
+
+                extras_list = this->next;
+
+                for (i = 0; i < (1 << this->order); i++)
+                        ClearPageNosave(this->page + i);
+
+                toi_free_pages(9, this->page, this->order);
+                toi_kfree(7, this, sizeof(*this));
+        }
+
+        extra_pages_allocated = 0;
+}
+
+/* toi_allocate_extra_pagedir_memory
+ *
+ * Description:        Allocate memory for making the atomic copy of pagedir1 in the
+ *                 case where it is bigger than pagedir2.
+ * Arguments:        int        num_to_alloc: Number of extra pages needed.
+ * Result:        int.         Number of extra pages we now have allocated.
+ */
+static int toi_allocate_extra_pagedir_memory(int extra_pages_needed)
+{
+        int j, order, num_to_alloc = extra_pages_needed - extra_pages_allocated;
+        gfp_t flags = TOI_ATOMIC_GFP;
+
+        if (num_to_alloc < 1)
+                return 0;
+
+        order = fls(num_to_alloc);
+        if (order >= MAX_ORDER)
+                order = MAX_ORDER - 1;
+
+        while (num_to_alloc) {
+                struct page *newpage;
+                unsigned long virt;
+                struct extras *extras_entry;
+
+                while ((1 << order) > num_to_alloc)
+                        order--;
+
+                extras_entry = (struct extras *) toi_kzalloc(7,
+                        sizeof(struct extras), TOI_ATOMIC_GFP);
+
+                if (!extras_entry)
+                        return extra_pages_allocated;
+
+                virt = toi_get_free_pages(9, flags, order);
+                while (!virt && order) {
+                        order--;
+                        virt = toi_get_free_pages(9, flags, order);
+                }
+
+                if (!virt) {
+                        toi_kfree(7, extras_entry, sizeof(*extras_entry));
+                        return extra_pages_allocated;
+                }
+
+                newpage = virt_to_page(virt);
+
+                extras_entry->page = newpage;
+                extras_entry->order = order;
+                extras_entry->next = extras_list;
+
+                extras_list = extras_entry;
+
+                for (j = 0; j < (1 << order); j++) {
+                        SetPageNosave(newpage + j);
+                        SetPagePageset1Copy(newpage + j);
+                }
+
+                extra_pages_allocated += (1 << order);
+                num_to_alloc -= (1 << order);
+        }
+
+        return extra_pages_allocated;
+}
+
+/*
+ * real_nr_free_pages: Count pcp pages for a zone type or all zones
+ * (-1 for all, otherwise zone_idx() result desired).
+ */
+unsigned long real_nr_free_pages(unsigned long zone_idx_mask)
+{
+        struct zone *zone;
+        int result = 0, cpu;
+
+        /* PCP lists */
+        for_each_populated_zone(zone) {
+                if (!(zone_idx_mask & (1 << zone_idx(zone))))
+                        continue;
+
+                for_each_online_cpu(cpu) {
+                        struct per_cpu_pageset *pset =
+                                per_cpu_ptr(zone->pageset, cpu);
+                        struct per_cpu_pages *pcp = &pset->pcp;
+                        result += pcp->count;
+                }
+
+                result += zone_page_state(zone, NR_FREE_PAGES);
+        }
+        return result;
+}
+
+/*
+ * Discover how much extra memory will be required by the drivers
+ * when they're asked to hibernate. We can then ensure that amount
+ * of memory is available when we really want it.
+ */
+static void get_extra_pd1_allowance(void)
+{
+        unsigned long orig_num_free = real_nr_free_pages(all_zones_mask), final;
+
+        toi_prepare_status(CLEAR_BAR, "Finding allowance for drivers.");
+
+        if (toi_go_atomic(PMSG_FREEZE, 1))
+                return;
+
+        final = real_nr_free_pages(all_zones_mask);
+        toi_end_atomic(ATOMIC_ALL_STEPS, 1, 0);
+
+        extra_pd1_pages_allowance = (orig_num_free > final) ?
+                orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE :
+                MIN_EXTRA_PAGES_ALLOWANCE;
+}
+
+/*
+ * Amount of storage needed, possibly taking into account the
+ * expected compression ratio and possibly also ignoring our
+ * allowance for extra pages.
+ */
+static unsigned long main_storage_needed(int use_ecr,
+                int ignore_extra_pd1_allow)
+{
+        return (pagedir1.size + pagedir2.size +
+          (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) *
+         (use_ecr ? toi_expected_compression_ratio() : 100) / 100;
+}
+
+/*
+ * Storage needed for the image header, in bytes until the return.
+ */
+unsigned long get_header_storage_needed(void)
+{
+        unsigned long bytes = sizeof(struct toi_header) +
+                        toi_header_storage_for_modules() +
+                        toi_pageflags_space_needed() +
+                        fs_info_space_needed();
+
+        return DIV_ROUND_UP(bytes, PAGE_SIZE);
+}
+
+/*
+ * When freeing memory, pages from either pageset might be freed.
+ *
+ * When seeking to free memory to be able to hibernate, for every ps1 page
+ * freed, we need 2 less pages for the atomic copy because there is one less
+ * page to copy and one more page into which data can be copied.
+ *
+ * Freeing ps2 pages saves us nothing directly. No more memory is available
+ * for the atomic copy. Indirectly, a ps1 page might be freed (slab?), but
+ * that's too much work to figure out.
+ *
+ * => ps1_to_free functions
+ *
+ * Of course if we just want to reduce the image size, because of storage
+ * limitations or an image size limit either ps will do.
+ *
+ * => any_to_free function
+ */
+
+static unsigned long lowpages_usable_for_highmem_copy(void)
+{
+        unsigned long needed = get_lowmem_size(pagedir1) +
+                        extra_pd1_pages_allowance + MIN_FREE_RAM +
+                        toi_memory_for_modules(0),
+                available = get_lowmem_size(pagedir2) +
+                         real_nr_free_low_pages() + extra_pages_allocated;
+
+        return available > needed ? available - needed : 0;
+}
+
+static unsigned long highpages_ps1_to_free(void)
+{
+        unsigned long need = get_highmem_size(pagedir1),
+                      available = get_highmem_size(pagedir2) +
+                              real_nr_free_high_pages() +
+                              lowpages_usable_for_highmem_copy();
+
+        return need > available ? DIV_ROUND_UP(need - available, 2) : 0;
+}
+
+static unsigned long lowpages_ps1_to_free(void)
+{
+        unsigned long needed = get_lowmem_size(pagedir1) +
+                        extra_pd1_pages_allowance + MIN_FREE_RAM +
+                        toi_memory_for_modules(0),
+                available = get_lowmem_size(pagedir2) +
+                         real_nr_free_low_pages() + extra_pages_allocated;
+
+        return needed > available ? DIV_ROUND_UP(needed - available, 2) : 0;
+}
+
+static unsigned long current_image_size(void)
+{
+        return pagedir1.size + pagedir2.size + header_storage_needed;
+}
+
+static unsigned long storage_still_required(void)
+{
+        unsigned long needed = main_storage_needed(1, 1);
+        return needed > storage_limit ? needed - storage_limit : 0;
+}
+
+static unsigned long ram_still_required(void)
+{
+        unsigned long needed = MIN_FREE_RAM + toi_memory_for_modules(0) +
+                2 * extra_pd1_pages_allowance,
+                  available = real_nr_free_low_pages() + extra_pages_allocated;
+        return needed > available ? needed - available : 0;
+}
+
+unsigned long any_to_free(int use_image_size_limit)
+{
+        int use_soft_limit = use_image_size_limit && image_size_limit > 0;
+        unsigned long current_size = current_image_size(),
+                      soft_limit = use_soft_limit ? (image_size_limit << 8) : 0,
+                      to_free = use_soft_limit ? (current_size > soft_limit ?
+                                      current_size - soft_limit : 0) : 0,
+                      storage_limit = storage_still_required(),
+                      ram_limit = ram_still_required(),
+                      first_max = max(to_free, storage_limit);
+
+        return max(first_max, ram_limit);
+}
+
+static int need_pageset2(void)
+{
+        return (real_nr_free_low_pages() + extra_pages_allocated -
+                2 * extra_pd1_pages_allowance - MIN_FREE_RAM -
+                 toi_memory_for_modules(0) - pagedir1.size) < pagedir2.size;
+}
+
+/* amount_needed
+ *
+ * Calculates the amount by which the image size needs to be reduced to meet
+ * our constraints.
+ */
+static unsigned long amount_needed(int use_image_size_limit)
+{
+        return max(highpages_ps1_to_free() + lowpages_ps1_to_free(),
+                        any_to_free(use_image_size_limit));
+}
+
+static int image_not_ready(int use_image_size_limit)
+{
+        toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
+                "Amount still needed (%lu) > 0:%u,"
+                " Storage allocd: %lu < %lu: %u.\n",
+                        amount_needed(use_image_size_limit),
+                        (amount_needed(use_image_size_limit) > 0),
+                        main_storage_allocated,
+                        main_storage_needed(1, 1),
+                        main_storage_allocated < main_storage_needed(1, 1));
+
+        toi_cond_pause(0, NULL);
+
+        return (amount_needed(use_image_size_limit) > 0) ||
+                 main_storage_allocated < main_storage_needed(1, 1);
+}
+
+static void display_failure_reason(int tries_exceeded)
+{
+        unsigned long storage_required = storage_still_required(),
+            ram_required = ram_still_required(),
+            high_ps1 = highpages_ps1_to_free(),
+            low_ps1 = lowpages_ps1_to_free();
+
+        printk(KERN_INFO "Failed to prepare the image because...\n");
+
+        if (!storage_limit) {
+                printk(KERN_INFO "- You need some storage available to be "
+                                "able to hibernate.\n");
+                return;
+        }
+
+        if (tries_exceeded)
+                printk(KERN_INFO "- The maximum number of iterations was "
+                                "reached without successfully preparing the "
+                                "image.\n");
+
+        if (storage_required) {
+                printk(KERN_INFO " - We need at least %lu pages of storage "
+                                "(ignoring the header), but only have %lu.\n",
+                                main_storage_needed(1, 1),
+                                main_storage_allocated);
+                set_abort_result(TOI_INSUFFICIENT_STORAGE);
+        }
+
+        if (ram_required) {
+                printk(KERN_INFO " - We need %lu more free pages of low "
+                                "memory.\n", ram_required);
+                printk(KERN_INFO "     Minimum free     : %8d\n", MIN_FREE_RAM);
+                printk(KERN_INFO "   + Reqd. by modules : %8lu\n",
+                                toi_memory_for_modules(0));
+                printk(KERN_INFO "   + 2 * extra allow  : %8lu\n",
+                                2 * extra_pd1_pages_allowance);
+                printk(KERN_INFO "   - Currently free   : %8lu\n",
+                                real_nr_free_low_pages());
+                printk(KERN_INFO "   - Pages allocd     : %8lu\n",
+                                extra_pages_allocated);
+                printk(KERN_INFO "                      : ========\n");
+                printk(KERN_INFO "     Still needed     : %8lu\n",
+                                ram_required);
+
+                /* Print breakdown of memory needed for modules */
+                toi_memory_for_modules(1);
+                set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+        }
+
+        if (high_ps1) {
+                printk(KERN_INFO "- We need to free %lu highmem pageset 1 "
+                                "pages.\n", high_ps1);
+                set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+        }
+
+        if (low_ps1) {
+                printk(KERN_INFO " - We need to free %ld lowmem pageset 1 "
+                                "pages.\n", low_ps1);
+                set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+        }
+}
+
+static void display_stats(int always, int sub_extra_pd1_allow)
+{
+        char buffer[255];
+        snprintf(buffer, 254,
+                "Free:%lu(%lu). Sets:%lu(%lu),%lu(%lu). "
+                "Nosave:%lu-%lu=%lu. Storage:%lu/%lu(%lu=>%lu). "
+                "Needed:%lu,%lu,%lu(%u,%lu,%lu,%ld) (PS2:%s)\n",
+
+                /* Free */
+                real_nr_free_pages(all_zones_mask),
+                real_nr_free_low_pages(),
+
+                /* Sets */
+                pagedir1.size, pagedir1.size - get_highmem_size(pagedir1),
+                pagedir2.size, pagedir2.size - get_highmem_size(pagedir2),
+
+                /* Nosave */
+                num_nosave, extra_pages_allocated,
+                num_nosave - extra_pages_allocated,
+
+                /* Storage */
+                main_storage_allocated,
+                storage_limit,
+                main_storage_needed(1, sub_extra_pd1_allow),
+                main_storage_needed(1, 1),
+
+                /* Needed */
+                lowpages_ps1_to_free(), highpages_ps1_to_free(),
+                any_to_free(1),
+                MIN_FREE_RAM, toi_memory_for_modules(0),
+                extra_pd1_pages_allowance,
+                image_size_limit,
+
+                need_pageset2() ? "yes" : "no");
+
+        if (always)
+                printk("%s", buffer);
+        else
+                toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 1, buffer);
+}
+
+/* flag_image_pages
+ *
+ * This routine generates our lists of pages to be stored in each
+ * pageset. Since we store the data using extents, and adding new
+ * extents might allocate a new extent page, this routine may well
+ * be called more than once.
+ */
+static void flag_image_pages(int atomic_copy)
+{
+        int num_free = 0, num_unmodified = 0;
+        unsigned long loop;
+        struct zone *zone;
+
+        pagedir1.size = 0;
+        pagedir2.size = 0;
+
+        set_highmem_size(pagedir1, 0);
+        set_highmem_size(pagedir2, 0);
+
+        num_nosave = 0;
+        toi_trace_index++;
+
+        memory_bm_clear(pageset1_map);
+
+        toi_generate_free_page_map();
+
+        /*
+         * Pages not to be saved are marked Nosave irrespective of being
+         * reserved.
+         */
+        for_each_populated_zone(zone) {
+                int highmem = is_highmem(zone);
+
+                for (loop = 0; loop < zone->spanned_pages; loop++) {
+                        unsigned long pfn = zone->zone_start_pfn + loop;
+                        struct page *page;
+                        int chunk_size;
+
+                        if (!pfn_valid(pfn)) {
+                            TOI_TRACE_DEBUG(pfn, "_Flag Invalid");
+                            continue;
+                        }
+
+                        chunk_size = toi_size_of_free_region(zone, pfn);
+                        if (chunk_size) {
+                            unsigned long y;
+                            for (y = pfn; y < pfn + chunk_size; y++) {
+                                page = pfn_to_page(y);
+                                TOI_TRACE_DEBUG(y, "_Flag Free");
+                                ClearPagePageset1(page);
+                                ClearPagePageset2(page);
+                            }
+                                num_free += chunk_size;
+                                loop += chunk_size - 1;
+                                continue;
+                        }
+
+                        page = pfn_to_page(pfn);
+
+                        if (PageNosave(page)) {
+                            char *desc = PagePageset1Copy(page) ? "Pageset1Copy" : "NoSave";
+                            TOI_TRACE_DEBUG(pfn, "_Flag %s", desc);
+                            num_nosave++;
+                            continue;
+                        }
+
+                        page = highmem ? saveable_highmem_page(zone, pfn) :
+                                saveable_page(zone, pfn);
+
+                        if (!page) {
+                                TOI_TRACE_DEBUG(pfn, "_Flag Nosave2");
+                                num_nosave++;
+                                continue;
+                        }
+
+                        if (PageTOI_RO(page) && test_result_state(TOI_KEPT_IMAGE)) {
+                            TOI_TRACE_DEBUG(pfn, "_Unmodified");
+                            num_unmodified++;
+                            continue;
+                        }
+
+                        if (PagePageset2(page)) {
+                                pagedir2.size++;
+                                TOI_TRACE_DEBUG(pfn, "_Flag PS2");
+                                if (PageHighMem(page))
+                                        inc_highmem_size(pagedir2);
+                                else
+                                        SetPagePageset1Copy(page);
+                                if (PageResave(page)) {
+                                        SetPagePageset1(page);
+                                        ClearPagePageset1Copy(page);
+                                        pagedir1.size++;
+                                        if (PageHighMem(page))
+                                                inc_highmem_size(pagedir1);
+                                }
+                        } else {
+                                pagedir1.size++;
+                                TOI_TRACE_DEBUG(pfn, "_Flag PS1");
+                                SetPagePageset1(page);
+                                if (PageHighMem(page))
+                                        inc_highmem_size(pagedir1);
+                        }
+                }
+        }
+
+        if (!atomic_copy)
+                toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 0,
+                        "Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%ld)"
+                                    " + Unmodified (%d) + NumFree (%d) = %d.\n",
+                        pagedir1.size, pagedir2.size, num_nosave, num_unmodified,
+                        num_free, pagedir1.size + pagedir2.size + num_nosave + num_free);
+}
+
+void toi_recalculate_image_contents(int atomic_copy)
+{
+        memory_bm_clear(pageset1_map);
+        if (!atomic_copy) {
+                unsigned long pfn;
+                memory_bm_position_reset(pageset2_map);
+                for (pfn = memory_bm_next_pfn(pageset2_map, 0);
+                                pfn != BM_END_OF_MAP;
+                                pfn = memory_bm_next_pfn(pageset2_map, 0))
+                        ClearPagePageset1Copy(pfn_to_page(pfn));
+                /* Need to call this before getting pageset1_size! */
+                toi_mark_pages_for_pageset2();
+        }
+        memory_bm_position_reset(pageset2_map);
+        flag_image_pages(atomic_copy);
+
+        if (!atomic_copy) {
+                storage_limit = toiActiveAllocator->storage_available();
+                display_stats(0, 0);
+        }
+}
+
+int try_allocate_extra_memory(void)
+{
+        unsigned long wanted = pagedir1.size +  extra_pd1_pages_allowance -
+                get_lowmem_size(pagedir2);
+        if (wanted > extra_pages_allocated) {
+                unsigned long got = toi_allocate_extra_pagedir_memory(wanted);
+                if (wanted < got) {
+                        toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
+                                "Want %d extra pages for pageset1, got %d.\n",
+                                wanted, got);
+                        return 1;
+                }
+        }
+        return 0;
+}
+
+/* update_image
+ *
+ * Allocate [more] memory and storage for the image.
+ */
+static void update_image(int ps2_recalc)
+{
+        int old_header_req;
+        unsigned long seek;
+
+        if (try_allocate_extra_memory())
+                return;
+
+        if (ps2_recalc)
+                goto recalc;
+
+        thaw_kernel_threads();
+
+        /*
+         * Allocate remaining storage space, if possible, up to the
+         * maximum we know we'll need. It's okay to allocate the
+         * maximum if the writer is the swapwriter, but
+         * we don't want to grab all available space on an NFS share.
+         * We therefore ignore the expected compression ratio here,
+         * thereby trying to allocate the maximum image size we could
+         * need (assuming compression doesn't expand the image), but
+         * don't complain if we can't get the full amount we're after.
+         */
+
+        do {
+                int result;
+
+                old_header_req = header_storage_needed;
+                toiActiveAllocator->reserve_header_space(header_storage_needed);
+
+                /* How much storage is free with the reservation applied? */
+                storage_limit = toiActiveAllocator->storage_available();
+                seek = min(storage_limit, main_storage_needed(0, 0));
+
+                result = toiActiveAllocator->allocate_storage(seek);
+                if (result)
+                        printk("Failed to allocate storage (%d).\n", result);
+
+                main_storage_allocated =
+                        toiActiveAllocator->storage_allocated();
+
+                /* Need more header because more storage allocated? */
+                header_storage_needed = get_header_storage_needed();
+
+        } while (header_storage_needed > old_header_req);
+
+        if (freeze_kernel_threads())
+                set_abort_result(TOI_FREEZING_FAILED);
+
+recalc:
+        toi_recalculate_image_contents(0);
+}
+
+/* attempt_to_freeze
+ *
+ * Try to freeze processes.
+ */
+
+static int attempt_to_freeze(void)
+{
+        int result;
+
+        /* Stop processes before checking again */
+        toi_prepare_status(CLEAR_BAR, "Freezing processes & syncing "
+                        "filesystems.");
+        result = freeze_processes();
+
+        if (result)
+                set_abort_result(TOI_FREEZING_FAILED);
+
+        result = freeze_kernel_threads();
+
+        if (result)
+                set_abort_result(TOI_FREEZING_FAILED);
+
+        return result;
+}
+
+/* eat_memory
+ *
+ * Try to free some memory, either to meet hard or soft constraints on the image
+ * characteristics.
+ *
+ * Hard constraints:
+ * - Pageset1 must be < half of memory;
+ * - We must have enough memory free at resume time to have pageset1
+ *   be able to be loaded in pages that don't conflict with where it has to
+ *   be restored.
+ * Soft constraints
+ * - User specificied image size limit.
+ */
+static void eat_memory(void)
+{
+        unsigned long amount_wanted = 0;
+        int did_eat_memory = 0;
+
+        /*
+         * Note that if we have enough storage space and enough free memory, we
+         * may exit without eating anything. We give up when the last 10
+         * iterations ate no extra pages because we're not going to get much
+         * more anyway, but the few pages we get will take a lot of time.
+         *
+         * We freeze processes before beginning, and then unfreeze them if we
+         * need to eat memory until we think we have enough. If our attempts
+         * to freeze fail, we give up and abort.
+         */
+
+        amount_wanted = amount_needed(1);
+
+        switch (image_size_limit) {
+        case -1: /* Don't eat any memory */
+                if (amount_wanted > 0) {
+                        set_abort_result(TOI_WOULD_EAT_MEMORY);
+                        return;
+                }
+                break;
+        case -2:  /* Free caches only */
+                drop_pagecache();
+                toi_recalculate_image_contents(0);
+                amount_wanted = amount_needed(1);
+                break;
+        default:
+                break;
+        }
+
+        if (amount_wanted > 0 && !test_result_state(TOI_ABORTED) &&
+                        image_size_limit != -1) {
+                unsigned long request = amount_wanted;
+                unsigned long high_req = max(highpages_ps1_to_free(),
+                                any_to_free(1));
+                unsigned long low_req = lowpages_ps1_to_free();
+                unsigned long got = 0;
+
+                toi_prepare_status(CLEAR_BAR,
+                                "Seeking to free %ldMB of memory.",
+                                MB(amount_wanted));
+
+                thaw_kernel_threads();
+
+                /*
+                 * Ask for too many because shrink_memory_mask doesn't
+                 * currently return enough most of the time.
+                 */
+                
+                if (low_req)
+                        got = shrink_memory_mask(low_req, GFP_KERNEL);
+                if (high_req)
+                        shrink_memory_mask(high_req - got, GFP_HIGHUSER);
+
+                did_eat_memory = 1;
+
+                toi_recalculate_image_contents(0);
+
+                amount_wanted = amount_needed(1);
+
+                printk(KERN_DEBUG "Asked shrink_memory_mask for %ld low pages &"
+                                " %ld pages from anywhere, got %ld.\n",
+                                high_req, low_req,
+                                request - amount_wanted);
+
+                toi_cond_pause(0, NULL);
+
+                if (freeze_kernel_threads())
+                        set_abort_result(TOI_FREEZING_FAILED);
+        }
+
+        if (did_eat_memory)
+                toi_recalculate_image_contents(0);
+}
+
+/* toi_prepare_image
+ *
+ * Entry point to the whole image preparation section.
+ *
+ * We do four things:
+ * - Freeze processes;
+ * - Ensure image size constraints are met;
+ * - Complete all the preparation for saving the image,
+ *   including allocation of storage. The only memory
+ *   that should be needed when we're finished is that
+ *   for actually storing the image (and we know how
+ *   much is needed for that because the modules tell
+ *   us).
+ * - Make sure that all dirty buffers are written out.
+ */
+#define MAX_TRIES 2
+int toi_prepare_image(void)
+{
+        int result = 1, tries = 1;
+
+        main_storage_allocated = 0;
+        no_ps2_needed = 0;
+
+        if (attempt_to_freeze())
+                return 1;
+
+        lock_device_hotplug();
+        set_toi_state(TOI_DEVICE_HOTPLUG_LOCKED);
+
+        if (!extra_pd1_pages_allowance)
+                get_extra_pd1_allowance();
+
+        storage_limit = toiActiveAllocator->storage_available();
+
+        if (!storage_limit) {
+                printk(KERN_INFO "No storage available. Didn't try to prepare "
+                                "an image.\n");
+                display_failure_reason(0);
+                set_abort_result(TOI_NOSTORAGE_AVAILABLE);
+                return 1;
+        }
+
+        if (build_attention_list()) {
+                abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
+                                "Unable to successfully prepare the image.\n");
+                return 1;
+        }
+
+        toi_recalculate_image_contents(0);
+
+        do {
+                toi_prepare_status(CLEAR_BAR,
+                                "Preparing Image. Try %d.", tries);
+
+                eat_memory();
+
+                if (test_result_state(TOI_ABORTED))
+                        break;
+
+                update_image(0);
+
+                tries++;
+
+        } while (image_not_ready(1) && tries <= MAX_TRIES &&
+                        !test_result_state(TOI_ABORTED));
+
+        result = image_not_ready(0);
+
+        /* TODO: Handle case where need to remove existing image and resave
+         * instead of adding to incremental image. */
+
+        if (!test_result_state(TOI_ABORTED)) {
+                if (result) {
+                        display_stats(1, 0);
+                        display_failure_reason(tries > MAX_TRIES);
+                        abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
+                                "Unable to successfully prepare the image.\n");
+                } else {
+                        /* Pageset 2 needed? */
+                        if (!need_pageset2() &&
+                                  test_action_state(TOI_NO_PS2_IF_UNNEEDED)) {
+                                no_ps2_needed = 1;
+                                toi_recalculate_image_contents(0);
+                                update_image(1);
+                        }
+
+                        toi_cond_pause(1, "Image preparation complete.");
+                }
+        }
+
+        return result ? result : allocate_checksum_pages();
+}
diff -Nur linux-4.2/kernel/power/tuxonice_prepare_image.h linux-4.2-pck/kernel/power/tuxonice_prepare_image.h
--- linux-4.2/kernel/power/tuxonice_prepare_image.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_prepare_image.h	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,38 @@
+/*
+ * kernel/power/tuxonice_prepare_image.h
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <asm/sections.h>
+
+extern int toi_prepare_image(void);
+extern void toi_recalculate_image_contents(int storage_available);
+extern unsigned long real_nr_free_pages(unsigned long zone_idx_mask);
+extern long image_size_limit;
+extern void toi_free_extra_pagedir_memory(void);
+extern unsigned long extra_pd1_pages_allowance;
+extern void free_attention_list(void);
+
+#define MIN_FREE_RAM 100
+#define MIN_EXTRA_PAGES_ALLOWANCE 500
+
+#define all_zones_mask ((unsigned long) ((1 << MAX_NR_ZONES) - 1))
+#ifdef CONFIG_HIGHMEM
+#define real_nr_free_high_pages() (real_nr_free_pages(1 << ZONE_HIGHMEM))
+#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask - \
+                                                (1 << ZONE_HIGHMEM)))
+#else
+#define real_nr_free_high_pages() (0)
+#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask))
+
+/* For eat_memory function */
+#define ZONE_HIGHMEM (MAX_NR_ZONES + 1)
+#endif
+
+unsigned long get_header_storage_needed(void);
+unsigned long any_to_free(int use_image_size_limit);
+int try_allocate_extra_memory(void);
diff -Nur linux-4.2/kernel/power/tuxonice_prune.c linux-4.2-pck/kernel/power/tuxonice_prune.c
--- linux-4.2/kernel/power/tuxonice_prune.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_prune.c	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,406 @@
+/*
+ * kernel/power/tuxonice_prune.c
+ *
+ * Copyright (C) 2012 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file implements a TuxOnIce module that seeks to prune the
+ * amount of data written to disk. It builds a table of hashes
+ * of the uncompressed data, and writes the pfn of the previous page
+ * with the same contents instead of repeating the data when a match
+ * is found.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <crypto/hash.h>
+
+#include "tuxonice_builtin.h"
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+
+/*
+ * We never write a page bigger than PAGE_SIZE, so use a large number
+ * to indicate that data is a PFN.
+ */
+#define PRUNE_DATA_IS_PFN (PAGE_SIZE + 100)
+
+static unsigned long toi_pruned_pages;
+
+static struct toi_module_ops toi_prune_ops;
+static struct toi_module_ops *next_driver;
+
+static char toi_prune_hash_algo_name[32] = "sha1";
+
+static DEFINE_MUTEX(stats_lock);
+
+struct cpu_context {
+        struct shash_desc desc;
+        char *digest;
+};
+
+#define OUT_BUF_SIZE (2 * PAGE_SIZE)
+
+static DEFINE_PER_CPU(struct cpu_context, contexts);
+
+/*
+ * toi_crypto_prepare
+ *
+ * Prepare to do some work by allocating buffers and transforms.
+ */
+static int toi_prune_crypto_prepare(void)
+{
+        int cpu, ret, digestsize;
+
+        if (!*toi_prune_hash_algo_name) {
+                printk(KERN_INFO "TuxOnIce: Pruning enabled but no "
+                                "hash algorithm set.\n");
+                return 1;
+        }
+
+        for_each_online_cpu(cpu) {
+                struct cpu_context *this = &per_cpu(contexts, cpu);
+                this->desc.tfm = crypto_alloc_shash(toi_prune_hash_algo_name, 0, 0);
+                if (IS_ERR(this->desc.tfm)) {
+                        printk(KERN_INFO "TuxOnIce: Failed to allocate the "
+                                        "%s prune hash algorithm.\n",
+                                        toi_prune_hash_algo_name);
+                        this->desc.tfm = NULL;
+                        return 1;
+                }
+
+                if (!digestsize)
+                        digestsize = crypto_shash_digestsize(this->desc.tfm);
+
+                this->digest = kmalloc(digestsize, GFP_KERNEL);
+                if (!this->digest) {
+                        printk(KERN_INFO "TuxOnIce: Failed to allocate space "
+                                        "for digest output.\n");
+                        crypto_free_shash(this->desc.tfm);
+                        this->desc.tfm = NULL;
+                }
+
+                this->desc.flags = 0;
+
+                ret = crypto_shash_init(&this->desc);
+                if (ret < 0) {
+                        printk(KERN_INFO "TuxOnIce: Failed to initialise the "
+                                        "%s prune hash algorithm.\n",
+                                        toi_prune_hash_algo_name);
+                        kfree(this->digest);
+                        this->digest = NULL;
+                        crypto_free_shash(this->desc.tfm);
+                        this->desc.tfm = NULL;
+                        return 1;
+                }
+        }
+
+        return 0;
+}
+
+static int toi_prune_rw_cleanup(int writing)
+{
+        int cpu;
+
+        for_each_online_cpu(cpu) {
+                struct cpu_context *this = &per_cpu(contexts, cpu);
+                if (this->desc.tfm) {
+                        crypto_free_shash(this->desc.tfm);
+                        this->desc.tfm = NULL;
+                }
+
+                if (this->digest) {
+                        kfree(this->digest);
+                        this->digest = NULL;
+                }
+        }
+
+        return 0;
+}
+
+/*
+ * toi_prune_init
+ */
+
+static int toi_prune_init(int toi_or_resume)
+{
+        if (!toi_or_resume)
+                return 0;
+
+        toi_pruned_pages = 0;
+
+        next_driver = toi_get_next_filter(&toi_prune_ops);
+
+        return next_driver ? 0 : -ECHILD;
+}
+
+/*
+ * toi_prune_rw_init()
+ */
+
+static int toi_prune_rw_init(int rw, int stream_number)
+{
+        if (toi_prune_crypto_prepare()) {
+                printk(KERN_ERR "Failed to initialise prune "
+                                "algorithm.\n");
+                if (rw == READ) {
+                        printk(KERN_INFO "Unable to read the image.\n");
+                        return -ENODEV;
+                } else {
+                        printk(KERN_INFO "Continuing without "
+                                "pruning the image.\n");
+                        toi_prune_ops.enabled = 0;
+                }
+        }
+
+        return 0;
+}
+
+/*
+ * toi_prune_write_page()
+ *
+ * Compress a page of data, buffering output and passing on filled
+ * pages to the next module in the pipeline.
+ *
+ * Buffer_page:        Pointer to a buffer of size PAGE_SIZE, containing
+ * data to be checked.
+ *
+ * Returns:        0 on success. Otherwise the error is that returned by later
+ *                 modules, -ECHILD if we have a broken pipeline or -EIO if
+ *                 zlib errs.
+ */
+static int toi_prune_write_page(unsigned long index, int buf_type,
+                void *buffer_page, unsigned int buf_size)
+{
+        int ret = 0, cpu = smp_processor_id(), write_data = 1;
+        struct cpu_context *ctx = &per_cpu(contexts, cpu);
+        u8* output_buffer = buffer_page;
+        int output_len = buf_size;
+        int out_buf_type = buf_type;
+        void *buffer_start;
+        u32 buf[4];
+
+        if (ctx->desc.tfm) {
+
+                buffer_start = TOI_MAP(buf_type, buffer_page);
+                ctx->len = OUT_BUF_SIZE;
+
+                ret = crypto_shash_digest(&ctx->desc, buffer_start, buf_size, &ctx->digest);
+                if (ret) {
+                        printk(KERN_INFO "TuxOnIce: Failed to calculate digest (%d).\n", ret);
+                } else {
+                        mutex_lock(&stats_lock);
+
+                        toi_pruned_pages++;
+
+                        mutex_unlock(&stats_lock);
+
+                }
+
+                TOI_UNMAP(buf_type, buffer_page);
+        }
+
+        if (write_data)
+                ret = next_driver->write_page(index, out_buf_type,
+                                output_buffer, output_len);
+        else
+                ret = next_driver->write_page(index, out_buf_type,
+                                output_buffer, output_len);
+
+        return ret;
+}
+
+/*
+ * toi_prune_read_page()
+ * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Retrieve data from later modules or from a previously loaded page and
+ * fill the input buffer.
+ * Zero if successful. Error condition from me or from downstream on failure.
+ */
+static int toi_prune_read_page(unsigned long *index, int buf_type,
+                void *buffer_page, unsigned int *buf_size)
+{
+        int ret, cpu = smp_processor_id();
+        unsigned int len;
+        char *buffer_start;
+        struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+        if (!ctx->desc.tfm)
+                return next_driver->read_page(index, TOI_PAGE, buffer_page,
+                                buf_size);
+
+        /*
+         * All our reads must be synchronous - we can't handle
+         * data that hasn't been read yet.
+         */
+
+        ret = next_driver->read_page(index, buf_type, buffer_page, &len);
+
+        if (len == PRUNE_DATA_IS_PFN) {
+                buffer_start = kmap(buffer_page);
+        }
+
+        return ret;
+}
+
+/*
+ * toi_prune_print_debug_stats
+ * @buffer: Pointer to a buffer into which the debug info will be printed.
+ * @size: Size of the buffer.
+ *
+ * Print information to be recorded for debugging purposes into a buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+
+static int toi_prune_print_debug_stats(char *buffer, int size)
+{
+        int len;
+
+        /* Output the number of pages pruned. */
+        if (*toi_prune_hash_algo_name)
+                len = scnprintf(buffer, size, "- Compressor is '%s'.\n",
+                                toi_prune_hash_algo_name);
+        else
+                len = scnprintf(buffer, size, "- Compressor is not set.\n");
+
+        if (toi_pruned_pages)
+                len += scnprintf(buffer+len, size - len, "  Pruned "
+                        "%lu pages).\n",
+                  toi_pruned_pages);
+        return len;
+}
+
+/*
+ * toi_prune_memory_needed
+ *
+ * Tell the caller how much memory we need to operate during hibernate/resume.
+ * Returns: Unsigned long. Maximum number of bytes of memory required for
+ * operation.
+ */
+static int toi_prune_memory_needed(void)
+{
+        return 2 * PAGE_SIZE;
+}
+
+static int toi_prune_storage_needed(void)
+{
+        return 2 * sizeof(unsigned long) + 2 * sizeof(int) +
+                strlen(toi_prune_hash_algo_name) + 1;
+}
+
+/*
+ * toi_prune_save_config_info
+ * @buffer: Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Save informaton needed when reloading the image at resume time.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_prune_save_config_info(char *buffer)
+{
+        int len = strlen(toi_prune_hash_algo_name) + 1, offset = 0;
+
+        *((unsigned long *) buffer) = toi_pruned_pages;
+        offset += sizeof(unsigned long);
+        *((int *) (buffer + offset)) = len;
+        offset += sizeof(int);
+        strncpy(buffer + offset, toi_prune_hash_algo_name, len);
+        return offset + len;
+}
+
+/* toi_prune_load_config_info
+ * @buffer: Pointer to the start of the data.
+ * @size: Number of bytes that were saved.
+ *
+ * Description:        Reload information needed for passing back to the
+ * resumed kernel.
+ */
+static void toi_prune_load_config_info(char *buffer, int size)
+{
+        int len, offset = 0;
+
+        toi_pruned_pages = *((unsigned long *) buffer);
+        offset += sizeof(unsigned long);
+        len = *((int *) (buffer + offset));
+        offset += sizeof(int);
+        strncpy(toi_prune_hash_algo_name, buffer + offset, len);
+}
+
+static void toi_prune_pre_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+        bkd->pruned_pages = toi_pruned_pages;
+}
+
+static void toi_prune_post_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+        toi_pruned_pages = bkd->pruned_pages;
+}
+
+/*
+ * toi_expected_ratio
+ *
+ * Description:        Returns the expected ratio between data passed into this module
+ *                 and the amount of data output when writing.
+ * Returns:        100 - we have no idea how many pages will be pruned.
+ */
+
+static int toi_prune_expected_ratio(void)
+{
+        return 100;
+}
+
+/*
+ * data for our sysfs entries.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_INT("enabled", SYSFS_RW, &toi_prune_ops.enabled, 0, 1, 0,
+                        NULL),
+        SYSFS_STRING("algorithm", SYSFS_RW, toi_prune_hash_algo_name, 31, 0, NULL),
+};
+
+/*
+ * Ops structure.
+ */
+static struct toi_module_ops toi_prune_ops = {
+        .type                        = FILTER_MODULE,
+        .name                        = "prune",
+        .directory                = "prune",
+        .module                        = THIS_MODULE,
+        .initialise                = toi_prune_init,
+        .memory_needed                 = toi_prune_memory_needed,
+        .print_debug_info        = toi_prune_print_debug_stats,
+        .save_config_info        = toi_prune_save_config_info,
+        .load_config_info        = toi_prune_load_config_info,
+        .storage_needed                = toi_prune_storage_needed,
+        .expected_compression        = toi_prune_expected_ratio,
+
+        .pre_atomic_restore        = toi_prune_pre_atomic_restore,
+        .post_atomic_restore        = toi_prune_post_atomic_restore,
+
+        .rw_init                = toi_prune_rw_init,
+        .rw_cleanup                = toi_prune_rw_cleanup,
+
+        .write_page                = toi_prune_write_page,
+        .read_page                = toi_prune_read_page,
+
+        .sysfs_data                = sysfs_params,
+        .num_sysfs_entries        = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+
+static __init int toi_prune_load(void)
+{
+        return toi_register_module(&toi_prune_ops);
+}
+
+late_initcall(toi_prune_load);
diff -Nur linux-4.2/kernel/power/tuxonice_storage.c linux-4.2-pck/kernel/power/tuxonice_storage.c
--- linux-4.2/kernel/power/tuxonice_storage.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_storage.c	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,282 @@
+/*
+ * kernel/power/tuxonice_storage.c
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for talking to a userspace program that manages storage.
+ *
+ * The kernel side:
+ * - starts the userspace program;
+ * - sends messages telling it when to open and close the connection;
+ * - tells it when to quit;
+ *
+ * The user space side:
+ * - passes messages regarding status;
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/freezer.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_ui.h"
+
+static struct user_helper_data usm_helper_data;
+static struct toi_module_ops usm_ops;
+static int message_received, usm_prepare_count;
+static int storage_manager_last_action, storage_manager_action;
+
+static int usm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+        int type;
+        int *data;
+
+        type = nlh->nlmsg_type;
+
+        /* A control message: ignore them */
+        if (type < NETLINK_MSG_BASE)
+                return 0;
+
+        /* Unknown message: reply with EINVAL */
+        if (type >= USM_MSG_MAX)
+                return -EINVAL;
+
+        /* All operations require privileges, even GET */
+        if (!capable(CAP_NET_ADMIN))
+                return -EPERM;
+
+        /* Only allow one task to receive NOFREEZE privileges */
+        if (type == NETLINK_MSG_NOFREEZE_ME && usm_helper_data.pid != -1)
+                return -EBUSY;
+
+        data = (int *) NLMSG_DATA(nlh);
+
+        switch (type) {
+        case USM_MSG_SUCCESS:
+        case USM_MSG_FAILED:
+                message_received = type;
+                complete(&usm_helper_data.wait_for_process);
+                break;
+        default:
+                printk(KERN_INFO "Storage manager doesn't recognise "
+                                "message %d.\n", type);
+        }
+
+        return 1;
+}
+
+#ifdef CONFIG_NET
+static int activations;
+
+int toi_activate_storage(int force)
+{
+        int tries = 1;
+
+        if (usm_helper_data.pid == -1 || !usm_ops.enabled)
+                return 0;
+
+        message_received = 0;
+        activations++;
+
+        if (activations > 1 && !force)
+                return 0;
+
+        while ((!message_received || message_received == USM_MSG_FAILED) &&
+                        tries < 2) {
+                toi_prepare_status(DONT_CLEAR_BAR, "Activate storage attempt "
+                                "%d.\n", tries);
+
+                init_completion(&usm_helper_data.wait_for_process);
+
+                toi_send_netlink_message(&usm_helper_data,
+                        USM_MSG_CONNECT,
+                        NULL, 0);
+
+                /* Wait 2 seconds for the userspace process to make contact */
+                wait_for_completion_timeout(&usm_helper_data.wait_for_process,
+                                2*HZ);
+
+                tries++;
+        }
+
+        return 0;
+}
+
+int toi_deactivate_storage(int force)
+{
+        if (usm_helper_data.pid == -1 || !usm_ops.enabled)
+                return 0;
+
+        message_received = 0;
+        activations--;
+
+        if (activations && !force)
+                return 0;
+
+        init_completion(&usm_helper_data.wait_for_process);
+
+        toi_send_netlink_message(&usm_helper_data,
+                        USM_MSG_DISCONNECT,
+                        NULL, 0);
+
+        wait_for_completion_timeout(&usm_helper_data.wait_for_process, 2*HZ);
+
+        if (!message_received || message_received == USM_MSG_FAILED) {
+                printk(KERN_INFO "Returning failure disconnecting storage.\n");
+                return 1;
+        }
+
+        return 0;
+}
+#endif
+
+static void storage_manager_simulate(void)
+{
+        printk(KERN_INFO "--- Storage manager simulate ---\n");
+        toi_prepare_usm();
+        schedule();
+        printk(KERN_INFO "--- Activate storage 1 ---\n");
+        toi_activate_storage(1);
+        schedule();
+        printk(KERN_INFO "--- Deactivate storage 1 ---\n");
+        toi_deactivate_storage(1);
+        schedule();
+        printk(KERN_INFO "--- Cleanup usm ---\n");
+        toi_cleanup_usm();
+        schedule();
+        printk(KERN_INFO "--- Storage manager simulate ends ---\n");
+}
+
+static int usm_storage_needed(void)
+{
+        return sizeof(int) + strlen(usm_helper_data.program) + 1;
+}
+
+static int usm_save_config_info(char *buf)
+{
+        int len = strlen(usm_helper_data.program);
+        memcpy(buf, usm_helper_data.program, len + 1);
+        return sizeof(int) + len + 1;
+}
+
+static void usm_load_config_info(char *buf, int size)
+{
+        /* Don't load the saved path if one has already been set */
+        if (usm_helper_data.program[0])
+                return;
+
+        memcpy(usm_helper_data.program, buf + sizeof(int), *((int *) buf));
+}
+
+static int usm_memory_needed(void)
+{
+        /* ball park figure of 32 pages */
+        return 32 * PAGE_SIZE;
+}
+
+/* toi_prepare_usm
+ */
+int toi_prepare_usm(void)
+{
+        usm_prepare_count++;
+
+        if (usm_prepare_count > 1 || !usm_ops.enabled)
+                return 0;
+
+        usm_helper_data.pid = -1;
+
+        if (!*usm_helper_data.program)
+                return 0;
+
+        toi_netlink_setup(&usm_helper_data);
+
+        if (usm_helper_data.pid == -1)
+                printk(KERN_INFO "TuxOnIce Storage Manager wanted, but couldn't"
+                                " start it.\n");
+
+        toi_activate_storage(0);
+
+        return usm_helper_data.pid != -1;
+}
+
+void toi_cleanup_usm(void)
+{
+        usm_prepare_count--;
+
+        if (usm_helper_data.pid > -1 && !usm_prepare_count) {
+                toi_deactivate_storage(0);
+                toi_netlink_close(&usm_helper_data);
+        }
+}
+
+static void storage_manager_activate(void)
+{
+        if (storage_manager_action == storage_manager_last_action)
+                return;
+
+        if (storage_manager_action)
+                toi_prepare_usm();
+        else
+                toi_cleanup_usm();
+
+        storage_manager_last_action = storage_manager_action;
+}
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_NONE("simulate_atomic_copy", storage_manager_simulate),
+        SYSFS_INT("enabled", SYSFS_RW, &usm_ops.enabled, 0, 1, 0, NULL),
+        SYSFS_STRING("program", SYSFS_RW, usm_helper_data.program, 254, 0,
+                NULL),
+        SYSFS_INT("activate_storage", SYSFS_RW , &storage_manager_action, 0, 1,
+                        0, storage_manager_activate)
+};
+
+static struct toi_module_ops usm_ops = {
+        .type                                = MISC_MODULE,
+        .name                                = "usm",
+        .directory                        = "storage_manager",
+        .module                                = THIS_MODULE,
+        .storage_needed                        = usm_storage_needed,
+        .save_config_info                = usm_save_config_info,
+        .load_config_info                = usm_load_config_info,
+        .memory_needed                        = usm_memory_needed,
+
+        .sysfs_data                        = sysfs_params,
+        .num_sysfs_entries                = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+/* toi_usm_sysfs_init
+ * Description: Boot time initialisation for user interface.
+ */
+int toi_usm_init(void)
+{
+        usm_helper_data.nl = NULL;
+        usm_helper_data.program[0] = '\0';
+        usm_helper_data.pid = -1;
+        usm_helper_data.skb_size = 0;
+        usm_helper_data.pool_limit = 6;
+        usm_helper_data.netlink_id = NETLINK_TOI_USM;
+        usm_helper_data.name = "userspace storage manager";
+        usm_helper_data.rcv_msg = usm_user_rcv_msg;
+        usm_helper_data.interface_version = 2;
+        usm_helper_data.must_init = 0;
+        init_completion(&usm_helper_data.wait_for_process);
+
+        return toi_register_module(&usm_ops);
+}
+
+void toi_usm_exit(void)
+{
+        toi_netlink_close_complete(&usm_helper_data);
+        toi_unregister_module(&usm_ops);
+}
diff -Nur linux-4.2/kernel/power/tuxonice_storage.h linux-4.2-pck/kernel/power/tuxonice_storage.h
--- linux-4.2/kernel/power/tuxonice_storage.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_storage.h	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,45 @@
+/*
+ * kernel/power/tuxonice_storage.h
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifdef CONFIG_NET
+int toi_prepare_usm(void);
+void toi_cleanup_usm(void);
+
+int toi_activate_storage(int force);
+int toi_deactivate_storage(int force);
+extern int toi_usm_init(void);
+extern void toi_usm_exit(void);
+#else
+static inline int toi_usm_init(void) { return 0; }
+static inline void toi_usm_exit(void) { }
+
+static inline int toi_activate_storage(int force)
+{
+        return 0;
+}
+
+static inline int toi_deactivate_storage(int force)
+{
+        return 0;
+}
+
+static inline int toi_prepare_usm(void) { return 0; }
+static inline void toi_cleanup_usm(void) { }
+#endif
+
+enum {
+        USM_MSG_BASE = 0x10,
+
+        /* Kernel -> Userspace */
+        USM_MSG_CONNECT = 0x30,
+        USM_MSG_DISCONNECT = 0x31,
+        USM_MSG_SUCCESS = 0x40,
+        USM_MSG_FAILED = 0x41,
+
+        USM_MSG_MAX,
+};
diff -Nur linux-4.2/kernel/power/tuxonice_swap.c linux-4.2-pck/kernel/power/tuxonice_swap.c
--- linux-4.2/kernel/power/tuxonice_swap.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_swap.c	2015-09-08 11:20:32.470345146 -0300
@@ -0,0 +1,474 @@
+/*
+ * kernel/power/tuxonice_swap.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file encapsulates functions for usage of swap space as a
+ * backing store.
+ */
+
+#include <linux/suspend.h>
+#include <linux/blkdev.h>
+#include <linux/swapops.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/fs_uuid.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_builtin.h"
+
+static struct toi_module_ops toi_swapops;
+
+/* For swapfile automatically swapon/off'd. */
+static char swapfilename[255] = "";
+static int toi_swapon_status;
+
+/* Swap Pages */
+static unsigned long swap_allocated;
+
+static struct sysinfo swapinfo;
+
+static int is_ram_backed(struct swap_info_struct *si)
+{
+        if (!strncmp(si->bdev->bd_disk->disk_name, "ram", 3) ||
+            !strncmp(si->bdev->bd_disk->disk_name, "zram", 4))
+                return 1;
+
+        return 0;
+}
+
+/**
+ * enable_swapfile: Swapon the user specified swapfile prior to hibernating.
+ *
+ * Activate the given swapfile if it wasn't already enabled. Remember whether
+ * we really did swapon it for swapoffing later.
+ */
+static void enable_swapfile(void)
+{
+        int activateswapresult = -EINVAL;
+
+        if (swapfilename[0]) {
+                /* Attempt to swap on with maximum priority */
+                activateswapresult = sys_swapon(swapfilename, 0xFFFF);
+                if (activateswapresult && activateswapresult != -EBUSY)
+                        printk(KERN_ERR "TuxOnIce: The swapfile/partition "
+                                "specified by /sys/power/tuxonice/swap/swapfile"
+                                " (%s) could not be turned on (error %d). "
+                                "Attempting to continue.\n",
+                                swapfilename, activateswapresult);
+                if (!activateswapresult)
+                        toi_swapon_status = 1;
+        }
+}
+
+/**
+ * disable_swapfile: Swapoff any file swaponed at the start of the cycle.
+ *
+ * If we did successfully swapon a file at the start of the cycle, swapoff
+ * it now (finishing up).
+ */
+static void disable_swapfile(void)
+{
+        if (!toi_swapon_status)
+                return;
+
+        sys_swapoff(swapfilename);
+        toi_swapon_status = 0;
+}
+
+static int add_blocks_to_extent_chain(struct toi_bdev_info *chain,
+                unsigned long start, unsigned long end)
+{
+        if (test_action_state(TOI_TEST_BIO))
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %lu-%lu to "
+                                "chain %p.", start << chain->bmap_shift,
+                                end << chain->bmap_shift, chain);
+
+        return toi_add_to_extent_chain(&chain->blocks, start, end);
+}
+
+
+static int get_main_pool_phys_params(struct toi_bdev_info *chain)
+{
+        struct hibernate_extent *extentpointer = NULL;
+        unsigned long address, extent_min = 0, extent_max = 0;
+        int empty = 1;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "get main pool phys params for "
+                        "chain %d.", chain->allocator_index);
+
+        if (!chain->allocations.first)
+                return 0;
+
+        if (chain->blocks.first)
+                toi_put_extent_chain(&chain->blocks);
+
+        toi_extent_for_each(&chain->allocations, extentpointer, address) {
+                swp_entry_t swap_address = (swp_entry_t) { address };
+                struct block_device *bdev;
+                sector_t new_sector = map_swap_entry(swap_address, &bdev);
+
+                if (empty) {
+                        empty = 0;
+                        extent_min = extent_max = new_sector;
+                        continue;
+                }
+
+                if (new_sector == extent_max + 1) {
+                        extent_max++;
+                        continue;
+                }
+
+                if (add_blocks_to_extent_chain(chain, extent_min, extent_max)) {
+                        printk(KERN_ERR "Out of memory while making block "
+                                        "chains.\n");
+                        return -ENOMEM;
+                }
+
+                extent_min = new_sector;
+                extent_max = new_sector;
+        }
+
+        if (!empty &&
+            add_blocks_to_extent_chain(chain, extent_min, extent_max)) {
+                printk(KERN_ERR "Out of memory while making block chains.\n");
+                return -ENOMEM;
+        }
+
+        return 0;
+}
+
+/*
+ * Like si_swapinfo, except that we don't include ram backed swap (compcache!)
+ * and don't need to use the spinlocks (userspace is stopped when this
+ * function is called).
+ */
+void si_swapinfo_no_compcache(void)
+{
+        unsigned int i;
+
+        si_swapinfo(&swapinfo);
+        swapinfo.freeswap = 0;
+        swapinfo.totalswap = 0;
+
+        for (i = 0; i < MAX_SWAPFILES; i++) {
+                struct swap_info_struct *si = get_swap_info_struct(i);
+                if (si && (si->flags & SWP_WRITEOK) && !is_ram_backed(si)) {
+                        swapinfo.totalswap += si->inuse_pages;
+                        swapinfo.freeswap += si->pages - si->inuse_pages;
+                }
+        }
+}
+/*
+ * We can't just remember the value from allocation time, because other
+ * processes might have allocated swap in the mean time.
+ */
+static unsigned long toi_swap_storage_available(void)
+{
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "In toi_swap_storage_available.");
+        si_swapinfo_no_compcache();
+        return swapinfo.freeswap + swap_allocated;
+}
+
+static int toi_swap_initialise(int starting_cycle)
+{
+        if (!starting_cycle)
+                return 0;
+
+        enable_swapfile();
+        return 0;
+}
+
+static void toi_swap_cleanup(int ending_cycle)
+{
+        if (!ending_cycle)
+                return;
+
+        disable_swapfile();
+}
+
+static void toi_swap_free_storage(struct toi_bdev_info *chain)
+{
+        /* Free swap entries */
+        struct hibernate_extent *extentpointer;
+        unsigned long extentvalue;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing storage for chain %p.",
+                        chain);
+
+        swap_allocated -= chain->allocations.size;
+        toi_extent_for_each(&chain->allocations, extentpointer, extentvalue)
+                swap_free((swp_entry_t) { extentvalue });
+
+        toi_put_extent_chain(&chain->allocations);
+}
+
+static void free_swap_range(unsigned long min, unsigned long max)
+{
+        int j;
+
+        for (j = min; j <= max; j++)
+                swap_free((swp_entry_t) { j });
+        swap_allocated -= (max - min + 1);
+}
+
+/*
+ * Allocation of a single swap type. Swap priorities are handled at the higher
+ * level.
+ */
+static int toi_swap_allocate_storage(struct toi_bdev_info *chain,
+                unsigned long request)
+{
+        unsigned long gotten = 0;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "  Swap allocate storage: Asked to"
+                        " allocate %lu pages from device %d.", request,
+                        chain->allocator_index);
+
+        while (gotten < request) {
+                swp_entry_t start, end;
+                if (0) {
+                    /* Broken at the moment for SSDs */
+                    get_swap_range_of_type(chain->allocator_index, &start, &end,
+                            request - gotten + 1);
+                } else {
+                    start = end = get_swap_page_of_type(chain->allocator_index);
+                }
+                if (start.val) {
+                        int added = end.val - start.val + 1;
+                        if (toi_add_to_extent_chain(&chain->allocations,
+                                                start.val, end.val)) {
+                                printk(KERN_INFO "Failed to allocate extent for "
+                                        "%lu-%lu.\n", start.val, end.val);
+                                free_swap_range(start.val, end.val);
+                                break;
+                        }
+                        gotten += added;
+                        swap_allocated += added;
+                } else
+                        break;
+        }
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "  Allocated %lu pages.", gotten);
+        return gotten;
+}
+
+static int toi_swap_register_storage(void)
+{
+        int i, result = 0;
+
+        toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_swap_register_storage.");
+        for (i = 0; i < MAX_SWAPFILES; i++) {
+                struct swap_info_struct *si = get_swap_info_struct(i);
+                struct toi_bdev_info *devinfo;
+                unsigned char *p;
+                unsigned char buf[256];
+                struct fs_info *fs_info;
+
+                if (!si || !(si->flags & SWP_WRITEOK) || is_ram_backed(si))
+                        continue;
+
+                devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info),
+                                GFP_ATOMIC);
+                if (!devinfo) {
+                        printk("Failed to allocate devinfo struct for swap "
+                                        "device %d.\n", i);
+                        return -ENOMEM;
+                }
+
+                devinfo->bdev = si->bdev;
+                devinfo->allocator = &toi_swapops;
+                devinfo->allocator_index = i;
+
+                fs_info = fs_info_from_block_dev(si->bdev);
+                if (fs_info && !IS_ERR(fs_info)) {
+                        memcpy(devinfo->uuid, &fs_info->uuid, 16);
+                        free_fs_info(fs_info);
+                } else
+                        result = (int) PTR_ERR(fs_info);
+
+                if (!fs_info)
+                        printk("fs_info from block dev returned %d.\n", result);
+                devinfo->dev_t = si->bdev->bd_dev;
+                devinfo->prio = si->prio;
+                devinfo->bmap_shift = 3;
+                devinfo->blocks_per_page = 1;
+
+                p = d_path(&si->swap_file->f_path, buf, sizeof(buf));
+                sprintf(devinfo->name, "swap on %s", p);
+
+                toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering swap storage:"
+                                " Device %d (%lx), prio %d.", i,
+                                (unsigned long) devinfo->dev_t, devinfo->prio);
+                toi_bio_ops.register_storage(devinfo);
+        }
+
+        return 0;
+}
+
+static unsigned long toi_swap_free_unused_storage(struct toi_bdev_info *chain, unsigned long used)
+{
+    struct hibernate_extent *extentpointer = NULL;
+    unsigned long extentvalue;
+    unsigned long i = 0, first_freed = 0;
+
+    toi_extent_for_each(&chain->allocations, extentpointer, extentvalue) {
+        i++;
+        if (i > used) {
+            swap_free((swp_entry_t) { extentvalue });
+            if (!first_freed)
+                first_freed = extentvalue;
+        }
+    }
+
+    return first_freed;
+}
+
+/*
+ * workspace_size
+ *
+ * Description:
+ * Returns the number of bytes of RAM needed for this
+ * code to do its work. (Used when calculating whether
+ * we have enough memory to be able to hibernate & resume).
+ *
+ */
+static int toi_swap_memory_needed(void)
+{
+        return 1;
+}
+
+/*
+ * Print debug info
+ *
+ * Description:
+ */
+static int toi_swap_print_debug_stats(char *buffer, int size)
+{
+        int len = 0;
+
+        len = scnprintf(buffer, size, "- Swap Allocator enabled.\n");
+        if (swapfilename[0])
+                len += scnprintf(buffer+len, size-len,
+                        "  Attempting to automatically swapon: %s.\n",
+                        swapfilename);
+
+        si_swapinfo_no_compcache();
+
+        len += scnprintf(buffer+len, size-len,
+                        "  Swap available for image: %lu pages.\n",
+                        swapinfo.freeswap + swap_allocated);
+
+        return len;
+}
+
+static int header_locations_read_sysfs(const char *page, int count)
+{
+        int i, printedpartitionsmessage = 0, len = 0, haveswap = 0;
+        struct inode *swapf = NULL;
+        int zone;
+        char *path_page = (char *) toi_get_free_page(10, GFP_KERNEL);
+        char *path, *output = (char *) page;
+        int path_len;
+
+        if (!page)
+                return 0;
+
+        for (i = 0; i < MAX_SWAPFILES; i++) {
+                struct swap_info_struct *si =  get_swap_info_struct(i);
+
+                if (!si || !(si->flags & SWP_WRITEOK))
+                        continue;
+
+                if (S_ISBLK(si->swap_file->f_mapping->host->i_mode)) {
+                        haveswap = 1;
+                        if (!printedpartitionsmessage) {
+                                len += sprintf(output + len,
+                                        "For swap partitions, simply use the "
+                                        "format: resume=swap:/dev/hda1.\n");
+                                printedpartitionsmessage = 1;
+                        }
+                } else {
+                        path_len = 0;
+
+                        path = d_path(&si->swap_file->f_path, path_page,
+                                        PAGE_SIZE);
+                        path_len = snprintf(path_page, PAGE_SIZE, "%s", path);
+
+                        haveswap = 1;
+                        swapf = si->swap_file->f_mapping->host;
+                        zone = bmap(swapf, 0);
+                        if (!zone) {
+                                len += sprintf(output + len,
+                                        "Swapfile %s has been corrupted. Reuse"
+                                        " mkswap on it and try again.\n",
+                                        path_page);
+                        } else {
+                                char name_buffer[BDEVNAME_SIZE];
+                                len += sprintf(output + len,
+                                        "For swapfile `%s`,"
+                                        " use resume=swap:/dev/%s:0x%x.\n",
+                                        path_page,
+                                        bdevname(si->bdev, name_buffer),
+                                        zone << (swapf->i_blkbits - 9));
+                        }
+                }
+        }
+
+        if (!haveswap)
+                len = sprintf(output, "You need to turn on swap partitions "
+                                "before examining this file.\n");
+
+        toi_free_page(10, (unsigned long) path_page);
+        return len;
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_STRING("swapfilename", SYSFS_RW, swapfilename, 255, 0, NULL),
+        SYSFS_CUSTOM("headerlocations", SYSFS_READONLY,
+                        header_locations_read_sysfs, NULL, 0, NULL),
+        SYSFS_INT("enabled", SYSFS_RW, &toi_swapops.enabled, 0, 1, 0,
+                        attempt_to_parse_resume_device2),
+};
+
+static struct toi_bio_allocator_ops toi_bio_swapops = {
+        .register_storage                        = toi_swap_register_storage,
+        .storage_available                        = toi_swap_storage_available,
+        .allocate_storage                        = toi_swap_allocate_storage,
+        .bmap                                        = get_main_pool_phys_params,
+        .free_storage                                = toi_swap_free_storage,
+        .free_unused_storage                    = toi_swap_free_unused_storage,
+};
+
+static struct toi_module_ops toi_swapops = {
+        .type                                        = BIO_ALLOCATOR_MODULE,
+        .name                                        = "swap storage",
+        .directory                                = "swap",
+        .module                                        = THIS_MODULE,
+        .memory_needed                                = toi_swap_memory_needed,
+        .print_debug_info                        = toi_swap_print_debug_stats,
+        .initialise                                = toi_swap_initialise,
+        .cleanup                                = toi_swap_cleanup,
+        .bio_allocator_ops                        = &toi_bio_swapops,
+
+        .sysfs_data                = sysfs_params,
+        .num_sysfs_entries        = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+static __init int toi_swap_load(void)
+{
+        return toi_register_module(&toi_swapops);
+}
+
+late_initcall(toi_swap_load);
diff -Nur linux-4.2/kernel/power/tuxonice_sysfs.c linux-4.2-pck/kernel/power/tuxonice_sysfs.c
--- linux-4.2/kernel/power/tuxonice_sysfs.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_sysfs.c	2015-09-08 11:20:32.473678317 -0300
@@ -0,0 +1,333 @@
+/*
+ * kernel/power/tuxonice_sysfs.c
+ *
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains support for sysfs entries for tuning TuxOnIce.
+ *
+ * We have a generic handler that deals with the most common cases, and
+ * hooks for special handlers to use.
+ */
+
+#include <linux/suspend.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_alloc.h"
+
+static int toi_sysfs_initialised;
+
+static void toi_initialise_sysfs(void);
+
+static struct toi_sysfs_data sysfs_params[];
+
+#define to_sysfs_data(_attr) container_of(_attr, struct toi_sysfs_data, attr)
+
+static void toi_main_wrapper(void)
+{
+        toi_try_hibernate();
+}
+
+static ssize_t toi_attr_show(struct kobject *kobj, struct attribute *attr,
+                              char *page)
+{
+        struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
+        int len = 0;
+        int full_prep = sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ;
+
+        if (full_prep && toi_start_anything(0))
+                return -EBUSY;
+
+        if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
+                toi_prepare_usm();
+
+        switch (sysfs_data->type) {
+        case TOI_SYSFS_DATA_CUSTOM:
+                len = (sysfs_data->data.special.read_sysfs) ?
+                        (sysfs_data->data.special.read_sysfs)(page, PAGE_SIZE)
+                        : 0;
+                break;
+        case TOI_SYSFS_DATA_BIT:
+                len = sprintf(page, "%d\n",
+                        -test_bit(sysfs_data->data.bit.bit,
+                                sysfs_data->data.bit.bit_vector));
+                break;
+        case TOI_SYSFS_DATA_INTEGER:
+                len = sprintf(page, "%d\n",
+                        *(sysfs_data->data.integer.variable));
+                break;
+        case TOI_SYSFS_DATA_LONG:
+                len = sprintf(page, "%ld\n",
+                        *(sysfs_data->data.a_long.variable));
+                break;
+        case TOI_SYSFS_DATA_UL:
+                len = sprintf(page, "%lu\n",
+                        *(sysfs_data->data.ul.variable));
+                break;
+        case TOI_SYSFS_DATA_STRING:
+                len = sprintf(page, "%s\n",
+                        sysfs_data->data.string.variable);
+                break;
+        }
+
+        if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
+                toi_cleanup_usm();
+
+        if (full_prep)
+                toi_finish_anything(0);
+
+        return len;
+}
+
+#define BOUND(_variable, _type) do { \
+        if (*_variable < sysfs_data->data._type.minimum) \
+                *_variable = sysfs_data->data._type.minimum; \
+        else if (*_variable > sysfs_data->data._type.maximum) \
+                *_variable = sysfs_data->data._type.maximum; \
+} while (0)
+
+static ssize_t toi_attr_store(struct kobject *kobj, struct attribute *attr,
+                const char *my_buf, size_t count)
+{
+        int assigned_temp_buffer = 0, result = count;
+        struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
+
+        if (toi_start_anything((sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME)))
+                return -EBUSY;
+
+        ((char *) my_buf)[count] = 0;
+
+        if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
+                toi_prepare_usm();
+
+        switch (sysfs_data->type) {
+        case TOI_SYSFS_DATA_CUSTOM:
+                if (sysfs_data->data.special.write_sysfs)
+                        result = (sysfs_data->data.special.write_sysfs)(my_buf,
+                                        count);
+                break;
+        case TOI_SYSFS_DATA_BIT:
+                {
+                unsigned long value;
+                result = kstrtoul(my_buf, 0, &value);
+                if (result)
+                        break;
+                if (value)
+                        set_bit(sysfs_data->data.bit.bit,
+                                (sysfs_data->data.bit.bit_vector));
+                else
+                        clear_bit(sysfs_data->data.bit.bit,
+                                (sysfs_data->data.bit.bit_vector));
+                }
+                break;
+        case TOI_SYSFS_DATA_INTEGER:
+                {
+                        long temp;
+                        result = kstrtol(my_buf, 0, &temp);
+                        if (result)
+                                break;
+                        *(sysfs_data->data.integer.variable) = (int) temp;
+                        BOUND(sysfs_data->data.integer.variable, integer);
+                        break;
+                }
+        case TOI_SYSFS_DATA_LONG:
+                {
+                        long *variable =
+                                sysfs_data->data.a_long.variable;
+                        result = kstrtol(my_buf, 0, variable);
+                        if (result)
+                                break;
+                        BOUND(variable, a_long);
+                        break;
+                }
+        case TOI_SYSFS_DATA_UL:
+                {
+                        unsigned long *variable =
+                                sysfs_data->data.ul.variable;
+                        result = kstrtoul(my_buf, 0, variable);
+                        if (result)
+                                break;
+                        BOUND(variable, ul);
+                        break;
+                }
+                break;
+        case TOI_SYSFS_DATA_STRING:
+                {
+                        int copy_len = count;
+                        char *variable =
+                                sysfs_data->data.string.variable;
+
+                        if (sysfs_data->data.string.max_length &&
+                            (copy_len > sysfs_data->data.string.max_length))
+                                copy_len = sysfs_data->data.string.max_length;
+
+                        if (!variable) {
+                                variable = (char *) toi_get_zeroed_page(31,
+                                                TOI_ATOMIC_GFP);
+                                sysfs_data->data.string.variable = variable;
+                                assigned_temp_buffer = 1;
+                        }
+                        strncpy(variable, my_buf, copy_len);
+                        if (copy_len && my_buf[copy_len - 1] == '\n')
+                                variable[count - 1] = 0;
+                        variable[count] = 0;
+                }
+                break;
+        }
+
+        if (!result)
+                result = count;
+
+        /* Side effect routine? */
+        if (result == count && sysfs_data->write_side_effect)
+                sysfs_data->write_side_effect();
+
+        /* Free temporary buffers */
+        if (assigned_temp_buffer) {
+                toi_free_page(31,
+                        (unsigned long) sysfs_data->data.string.variable);
+                sysfs_data->data.string.variable = NULL;
+        }
+
+        if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
+                toi_cleanup_usm();
+
+        toi_finish_anything(sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME);
+
+        return result;
+}
+
+static struct sysfs_ops toi_sysfs_ops = {
+        .show        = &toi_attr_show,
+        .store        = &toi_attr_store,
+};
+
+static struct kobj_type toi_ktype = {
+        .sysfs_ops        = &toi_sysfs_ops,
+};
+
+struct kobject *tuxonice_kobj;
+
+/* Non-module sysfs entries.
+ *
+ * This array contains entries that are automatically registered at
+ * boot. Modules and the console code register their own entries separately.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+        SYSFS_CUSTOM("do_hibernate", SYSFS_WRITEONLY, NULL, NULL,
+                SYSFS_HIBERNATING, toi_main_wrapper),
+        SYSFS_CUSTOM("do_resume", SYSFS_WRITEONLY, NULL, NULL,
+                SYSFS_RESUMING, toi_try_resume)
+};
+
+void remove_toi_sysdir(struct kobject *kobj)
+{
+        if (!kobj)
+                return;
+
+        kobject_put(kobj);
+}
+
+struct kobject *make_toi_sysdir(char *name)
+{
+        struct kobject *kobj = kobject_create_and_add(name, tuxonice_kobj);
+
+        if (!kobj) {
+                printk(KERN_INFO "TuxOnIce: Can't allocate kobject for sysfs "
+                                "dir!\n");
+                return NULL;
+        }
+
+        kobj->ktype = &toi_ktype;
+
+        return kobj;
+}
+
+/* toi_register_sysfs_file
+ *
+ * Helper for registering a new /sysfs/tuxonice entry.
+ */
+
+int toi_register_sysfs_file(
+                struct kobject *kobj,
+                struct toi_sysfs_data *toi_sysfs_data)
+{
+        int result;
+
+        if (!toi_sysfs_initialised)
+                toi_initialise_sysfs();
+
+        result = sysfs_create_file(kobj, &toi_sysfs_data->attr);
+        if (result)
+                printk(KERN_INFO "TuxOnIce: sysfs_create_file for %s "
+                        "returned %d.\n",
+                        toi_sysfs_data->attr.name, result);
+        kobj->ktype = &toi_ktype;
+
+        return result;
+}
+
+/* toi_unregister_sysfs_file
+ *
+ * Helper for removing unwanted /sys/power/tuxonice entries.
+ *
+ */
+void toi_unregister_sysfs_file(struct kobject *kobj,
+                struct toi_sysfs_data *toi_sysfs_data)
+{
+        sysfs_remove_file(kobj, &toi_sysfs_data->attr);
+}
+
+void toi_cleanup_sysfs(void)
+{
+        int i,
+            numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+        if (!toi_sysfs_initialised)
+                return;
+
+        for (i = 0; i < numfiles; i++)
+                toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+
+        kobject_put(tuxonice_kobj);
+        toi_sysfs_initialised = 0;
+}
+
+/* toi_initialise_sysfs
+ *
+ * Initialise the /sysfs/tuxonice directory.
+ */
+
+static void toi_initialise_sysfs(void)
+{
+        int i;
+        int numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+        if (toi_sysfs_initialised)
+                return;
+
+        /* Make our TuxOnIce directory a child of /sys/power */
+        tuxonice_kobj = kobject_create_and_add("tuxonice", power_kobj);
+        if (!tuxonice_kobj)
+                return;
+
+        toi_sysfs_initialised = 1;
+
+        for (i = 0; i < numfiles; i++)
+                toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+}
+
+int toi_sysfs_init(void)
+{
+        toi_initialise_sysfs();
+        return 0;
+}
+
+void toi_sysfs_exit(void)
+{
+        toi_cleanup_sysfs();
+}
diff -Nur linux-4.2/kernel/power/tuxonice_sysfs.h linux-4.2-pck/kernel/power/tuxonice_sysfs.h
--- linux-4.2/kernel/power/tuxonice_sysfs.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_sysfs.h	2015-09-08 11:20:32.473678317 -0300
@@ -0,0 +1,137 @@
+/*
+ * kernel/power/tuxonice_sysfs.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/sysfs.h>
+
+struct toi_sysfs_data {
+        struct attribute attr;
+        int type;
+        int flags;
+        union {
+                struct {
+                        unsigned long *bit_vector;
+                        int bit;
+                } bit;
+                struct {
+                        int *variable;
+                        int minimum;
+                        int maximum;
+                } integer;
+                struct {
+                        long *variable;
+                        long minimum;
+                        long maximum;
+                } a_long;
+                struct {
+                        unsigned long *variable;
+                        unsigned long minimum;
+                        unsigned long maximum;
+                } ul;
+                struct {
+                        char *variable;
+                        int max_length;
+                } string;
+                struct {
+                        int (*read_sysfs) (const char *buffer, int count);
+                        int (*write_sysfs) (const char *buffer, int count);
+                        void *data;
+                } special;
+        } data;
+
+        /* Side effects routine. Used, eg, for reparsing the
+         * resume= entry when it changes */
+        void (*write_side_effect) (void);
+        struct list_head sysfs_data_list;
+};
+
+enum {
+        TOI_SYSFS_DATA_NONE = 1,
+        TOI_SYSFS_DATA_CUSTOM,
+        TOI_SYSFS_DATA_BIT,
+        TOI_SYSFS_DATA_INTEGER,
+        TOI_SYSFS_DATA_UL,
+        TOI_SYSFS_DATA_LONG,
+        TOI_SYSFS_DATA_STRING
+};
+
+#define SYSFS_WRITEONLY 0200
+#define SYSFS_READONLY 0444
+#define SYSFS_RW 0644
+
+#define SYSFS_BIT(_name, _mode, _ul, _bit, _flags) { \
+        .attr = {.name  = _name , .mode   = _mode }, \
+        .type = TOI_SYSFS_DATA_BIT, \
+        .flags = _flags, \
+        .data = { .bit = { .bit_vector = _ul, .bit = _bit } } }
+
+#define SYSFS_INT(_name, _mode, _int, _min, _max, _flags, _wse) { \
+        .attr = {.name  = _name , .mode   = _mode }, \
+        .type = TOI_SYSFS_DATA_INTEGER, \
+        .flags = _flags, \
+        .data = { .integer = { .variable = _int, .minimum = _min, \
+                        .maximum = _max } }, \
+        .write_side_effect = _wse }
+
+#define SYSFS_UL(_name, _mode, _ul, _min, _max, _flags) { \
+        .attr = {.name  = _name , .mode   = _mode }, \
+        .type = TOI_SYSFS_DATA_UL, \
+        .flags = _flags, \
+        .data = { .ul = { .variable = _ul, .minimum = _min, \
+                        .maximum = _max } } }
+
+#define SYSFS_LONG(_name, _mode, _long, _min, _max, _flags) { \
+        .attr = {.name  = _name , .mode   = _mode }, \
+        .type = TOI_SYSFS_DATA_LONG, \
+        .flags = _flags, \
+        .data = { .a_long = { .variable = _long, .minimum = _min, \
+                        .maximum = _max } } }
+
+#define SYSFS_STRING(_name, _mode, _string, _max_len, _flags, _wse) { \
+        .attr = {.name  = _name , .mode   = _mode }, \
+        .type = TOI_SYSFS_DATA_STRING, \
+        .flags = _flags, \
+        .data = { .string = { .variable = _string, .max_length = _max_len } }, \
+        .write_side_effect = _wse }
+
+#define SYSFS_CUSTOM(_name, _mode, _read, _write, _flags, _wse) { \
+        .attr = {.name  = _name , .mode   = _mode }, \
+        .type = TOI_SYSFS_DATA_CUSTOM, \
+        .flags = _flags, \
+        .data = { .special = { .read_sysfs = _read, .write_sysfs = _write } }, \
+        .write_side_effect = _wse }
+
+#define SYSFS_NONE(_name, _wse) { \
+        .attr = {.name  = _name , .mode   = SYSFS_WRITEONLY }, \
+        .type = TOI_SYSFS_DATA_NONE, \
+        .write_side_effect = _wse, \
+}
+
+/* Flags */
+#define SYSFS_NEEDS_SM_FOR_READ 1
+#define SYSFS_NEEDS_SM_FOR_WRITE 2
+#define SYSFS_HIBERNATE 4
+#define SYSFS_RESUME 8
+#define SYSFS_HIBERNATE_OR_RESUME (SYSFS_HIBERNATE | SYSFS_RESUME)
+#define SYSFS_HIBERNATING (SYSFS_HIBERNATE | SYSFS_NEEDS_SM_FOR_WRITE)
+#define SYSFS_RESUMING (SYSFS_RESUME | SYSFS_NEEDS_SM_FOR_WRITE)
+#define SYSFS_NEEDS_SM_FOR_BOTH \
+ (SYSFS_NEEDS_SM_FOR_READ | SYSFS_NEEDS_SM_FOR_WRITE)
+
+int toi_register_sysfs_file(struct kobject *kobj,
+                struct toi_sysfs_data *toi_sysfs_data);
+void toi_unregister_sysfs_file(struct kobject *kobj,
+                struct toi_sysfs_data *toi_sysfs_data);
+
+extern struct kobject *tuxonice_kobj;
+
+struct kobject *make_toi_sysdir(char *name);
+void remove_toi_sysdir(struct kobject *obj);
+extern void toi_cleanup_sysfs(void);
+
+extern int toi_sysfs_init(void);
+extern void toi_sysfs_exit(void);
diff -Nur linux-4.2/kernel/power/tuxonice_ui.c linux-4.2-pck/kernel/power/tuxonice_ui.c
--- linux-4.2/kernel/power/tuxonice_ui.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_ui.c	2015-09-08 11:20:32.473678317 -0300
@@ -0,0 +1,247 @@
+/*
+ * kernel/power/tuxonice_ui.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for TuxOnIce's user interface.
+ *
+ * The user interface code talks to a userspace program via a
+ * netlink socket.
+ *
+ * The kernel side:
+ * - starts the userui program;
+ * - sends text messages and progress bar status;
+ *
+ * The user space side:
+ * - passes messages regarding user requests (abort, toggle reboot etc)
+ *
+ */
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/reboot.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_builtin.h"
+
+static char local_printf_buf[1024];        /* Same as printk - should be safe */
+struct ui_ops *toi_current_ui;
+
+/**
+ * toi_wait_for_keypress - Wait for keypress via userui or /dev/console.
+ *
+ * @timeout: Maximum time to wait.
+ *
+ * Wait for a keypress, either from userui or /dev/console if userui isn't
+ * available. The non-userui path is particularly for at boot-time, prior
+ * to userui being started, when we have an important warning to give to
+ * the user.
+ */
+static char toi_wait_for_keypress(int timeout)
+{
+        if (toi_current_ui && toi_current_ui->wait_for_key(timeout))
+                return ' ';
+
+        return toi_wait_for_keypress_dev_console(timeout);
+}
+
+/* toi_early_boot_message()
+ * Description:        Handle errors early in the process of booting.
+ *                 The user may press C to continue booting, perhaps
+ *                 invalidating the image,  or space to reboot.
+ *                 This works from either the serial console or normally
+ *                 attached keyboard.
+ *
+ *                 Note that we come in here from init, while the kernel is
+ *                 locked. If we want to get events from the serial console,
+ *                 we need to temporarily unlock the kernel.
+ *
+ *                 toi_early_boot_message may also be called post-boot.
+ *                 In this case, it simply printks the message and returns.
+ *
+ * Arguments:        int        Whether we are able to erase the image.
+ *                 int        default_answer. What to do when we timeout. This
+ *                         will normally be continue, but the user might
+ *                         provide command line options (__setup) to override
+ *                         particular cases.
+ *                 Char *. Pointer to a string explaining why we're moaning.
+ */
+
+#define say(message, a...) printk(KERN_EMERG message, ##a)
+
+void toi_early_boot_message(int message_detail, int default_answer,
+        char *warning_reason, ...)
+{
+#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
+        unsigned long orig_state = get_toi_state(), continue_req = 0;
+        unsigned long orig_loglevel = console_loglevel;
+        int can_ask = 1;
+#else
+        int can_ask = 0;
+#endif
+
+        va_list args;
+        int printed_len;
+
+        if (!toi_wait) {
+                set_toi_state(TOI_CONTINUE_REQ);
+                can_ask = 0;
+        }
+
+        if (warning_reason) {
+                va_start(args, warning_reason);
+                printed_len = vsnprintf(local_printf_buf,
+                                sizeof(local_printf_buf),
+                                warning_reason,
+                                args);
+                va_end(args);
+        }
+
+        if (!test_toi_state(TOI_BOOT_TIME)) {
+                printk("TuxOnIce: %s\n", local_printf_buf);
+                return;
+        }
+
+        if (!can_ask) {
+                continue_req = !!default_answer;
+                goto post_ask;
+        }
+
+#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
+        console_loglevel = 7;
+
+        say("=== TuxOnIce ===\n\n");
+        if (warning_reason) {
+                say("BIG FAT WARNING!! %s\n\n", local_printf_buf);
+                switch (message_detail) {
+                case 0:
+                        say("If you continue booting, note that any image WILL"
+                                "NOT BE REMOVED.\nTuxOnIce is unable to do so "
+                                "because the appropriate modules aren't\n"
+                                "loaded. You should manually remove the image "
+                                "to avoid any\npossibility of corrupting your "
+                                "filesystem(s) later.\n");
+                        break;
+                case 1:
+                        say("If you want to use the current TuxOnIce image, "
+                                "reboot and try\nagain with the same kernel "
+                                "that you hibernated from. If you want\n"
+                                "to forget that image, continue and the image "
+                                "will be erased.\n");
+                        break;
+                }
+                say("Press SPACE to reboot or C to continue booting with "
+                        "this kernel\n\n");
+                if (toi_wait > 0)
+                        say("Default action if you don't select one in %d "
+                                "seconds is: %s.\n",
+                                toi_wait,
+                                default_answer == TOI_CONTINUE_REQ ?
+                                "continue booting" : "reboot");
+        } else {
+                say("BIG FAT WARNING!!\n\n"
+                        "You have tried to resume from this image before.\n"
+                        "If it failed once, it may well fail again.\n"
+                        "Would you like to remove the image and boot "
+                        "normally?\nThis will be equivalent to entering "
+                        "noresume on the\nkernel command line.\n\n"
+                        "Press SPACE to remove the image or C to continue "
+                        "resuming.\n\n");
+                if (toi_wait > 0)
+                        say("Default action if you don't select one in %d "
+                                "seconds is: %s.\n", toi_wait,
+                                !!default_answer ?
+                                "continue resuming" : "remove the image");
+        }
+        console_loglevel = orig_loglevel;
+
+        set_toi_state(TOI_SANITY_CHECK_PROMPT);
+        clear_toi_state(TOI_CONTINUE_REQ);
+
+        if (toi_wait_for_keypress(toi_wait) == 0) /* We timed out */
+                continue_req = !!default_answer;
+        else
+                continue_req = test_toi_state(TOI_CONTINUE_REQ);
+
+#endif /* CONFIG_VT or CONFIG_SERIAL_CONSOLE */
+
+post_ask:
+        if ((warning_reason) && (!continue_req))
+                kernel_restart(NULL);
+
+        restore_toi_state(orig_state);
+        if (continue_req)
+                set_toi_state(TOI_CONTINUE_REQ);
+}
+
+#undef say
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
+        SYSFS_INT("default_console_level", SYSFS_RW,
+                        &toi_bkd.toi_default_console_level, 0, 7, 0, NULL),
+        SYSFS_UL("debug_sections", SYSFS_RW, &toi_bkd.toi_debug_state, 0,
+                        1 << 30, 0),
+        SYSFS_BIT("log_everything", SYSFS_RW, &toi_bkd.toi_action, TOI_LOGALL,
+                        0)
+#endif
+};
+
+static struct toi_module_ops userui_ops = {
+        .type                                = MISC_HIDDEN_MODULE,
+        .name                                = "printk ui",
+        .directory                        = "user_interface",
+        .module                                = THIS_MODULE,
+        .sysfs_data                        = sysfs_params,
+        .num_sysfs_entries                = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+int toi_register_ui_ops(struct ui_ops *this_ui)
+{
+        if (toi_current_ui) {
+                printk(KERN_INFO "Only one TuxOnIce user interface module can "
+                                "be loaded at a time.");
+                return -EBUSY;
+        }
+
+        toi_current_ui = this_ui;
+
+        return 0;
+}
+
+void toi_remove_ui_ops(struct ui_ops *this_ui)
+{
+        if (toi_current_ui != this_ui)
+                return;
+
+        toi_current_ui = NULL;
+}
+
+/* toi_console_sysfs_init
+ * Description: Boot time initialisation for user interface.
+ */
+
+int toi_ui_init(void)
+{
+        return toi_register_module(&userui_ops);
+}
+
+void toi_ui_exit(void)
+{
+        toi_unregister_module(&userui_ops);
+}
diff -Nur linux-4.2/kernel/power/tuxonice_ui.h linux-4.2-pck/kernel/power/tuxonice_ui.h
--- linux-4.2/kernel/power/tuxonice_ui.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_ui.h	2015-09-08 11:20:32.473678317 -0300
@@ -0,0 +1,97 @@
+/*
+ * kernel/power/tuxonice_ui.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ */
+
+enum {
+        DONT_CLEAR_BAR,
+        CLEAR_BAR
+};
+
+enum {
+        /* Userspace -> Kernel */
+        USERUI_MSG_ABORT = 0x11,
+        USERUI_MSG_SET_STATE = 0x12,
+        USERUI_MSG_GET_STATE = 0x13,
+        USERUI_MSG_GET_DEBUG_STATE = 0x14,
+        USERUI_MSG_SET_DEBUG_STATE = 0x15,
+        USERUI_MSG_SPACE = 0x18,
+        USERUI_MSG_GET_POWERDOWN_METHOD = 0x1A,
+        USERUI_MSG_SET_POWERDOWN_METHOD = 0x1B,
+        USERUI_MSG_GET_LOGLEVEL = 0x1C,
+        USERUI_MSG_SET_LOGLEVEL = 0x1D,
+        USERUI_MSG_PRINTK = 0x1E,
+
+        /* Kernel -> Userspace */
+        USERUI_MSG_MESSAGE = 0x21,
+        USERUI_MSG_PROGRESS = 0x22,
+        USERUI_MSG_POST_ATOMIC_RESTORE = 0x25,
+
+        USERUI_MSG_MAX,
+};
+
+struct userui_msg_params {
+        u32 a, b, c, d;
+        char text[255];
+};
+
+struct ui_ops {
+        char (*wait_for_key) (int timeout);
+        u32 (*update_status) (u32 value, u32 maximum, const char *fmt, ...);
+        void (*prepare_status) (int clearbar, const char *fmt, ...);
+        void (*cond_pause) (int pause, char *message);
+        void (*abort)(int result_code, const char *fmt, ...);
+        void (*prepare)(void);
+        void (*cleanup)(void);
+        void (*message)(u32 section, u32 level, u32 normally_logged,
+                        const char *fmt, ...);
+};
+
+extern struct ui_ops *toi_current_ui;
+
+#define toi_update_status(val, max, fmt, args...) \
+ (toi_current_ui ? (toi_current_ui->update_status) (val, max, fmt, ##args) : \
+        max)
+
+#define toi_prepare_console(void) \
+        do { if (toi_current_ui) \
+                (toi_current_ui->prepare)(); \
+        } while (0)
+
+#define toi_cleanup_console(void) \
+        do { if (toi_current_ui) \
+                (toi_current_ui->cleanup)(); \
+        } while (0)
+
+#define abort_hibernate(result, fmt, args...) \
+        do { if (toi_current_ui) \
+                (toi_current_ui->abort)(result, fmt, ##args); \
+             else { \
+                set_abort_result(result); \
+             } \
+        } while (0)
+
+#define toi_cond_pause(pause, message) \
+        do { if (toi_current_ui) \
+                (toi_current_ui->cond_pause)(pause, message); \
+        } while (0)
+
+#define toi_prepare_status(clear, fmt, args...) \
+        do { if (toi_current_ui) \
+                (toi_current_ui->prepare_status)(clear, fmt, ##args); \
+             else \
+                printk(KERN_INFO fmt "%s", ##args, "\n"); \
+        } while (0)
+
+#define toi_message(sn, lev, log, fmt, a...) \
+do { \
+        if (toi_current_ui && (!sn || test_debug_state(sn))) \
+                toi_current_ui->message(sn, lev, log, fmt, ##a); \
+} while (0)
+
+__exit void toi_ui_cleanup(void);
+extern int toi_ui_init(void);
+extern void toi_ui_exit(void);
+extern int toi_register_ui_ops(struct ui_ops *this_ui);
+extern void toi_remove_ui_ops(struct ui_ops *this_ui);
diff -Nur linux-4.2/kernel/power/tuxonice_userui.c linux-4.2-pck/kernel/power/tuxonice_userui.c
--- linux-4.2/kernel/power/tuxonice_userui.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/kernel/power/tuxonice_userui.c	2015-09-08 11:20:32.473678317 -0300
@@ -0,0 +1,658 @@
+/*
+ * kernel/power/user_ui.c
+ *
+ * Copyright (C) 2005-2007 Bernard Blackham
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for TuxOnIce's user interface.
+ *
+ * The user interface code talks to a userspace program via a
+ * netlink socket.
+ *
+ * The kernel side:
+ * - starts the userui program;
+ * - sends text messages and progress bar status;
+ *
+ * The user space side:
+ * - passes messages regarding user requests (abort, toggle reboot etc)
+ *
+ */
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/suspend.h>
+#include <linux/freezer.h>
+#include <linux/console.h>
+#include <linux/ctype.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>
+#include <linux/reboot.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/vt.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_power_off.h"
+
+static char local_printf_buf[1024];        /* Same as printk - should be safe */
+
+static struct user_helper_data ui_helper_data;
+static struct toi_module_ops userui_ops;
+static int orig_kmsg;
+
+static char lastheader[512];
+static int lastheader_message_len;
+static int ui_helper_changed; /* Used at resume-time so don't overwrite value
+                                set from initrd/ramfs. */
+
+/* Number of distinct progress amounts that userspace can display */
+static int progress_granularity = 30;
+
+static DECLARE_WAIT_QUEUE_HEAD(userui_wait_for_key);
+static int userui_wait_should_wake;
+
+#define toi_stop_waiting_for_userui_key() \
+{ \
+        userui_wait_should_wake = true; \
+        wake_up_interruptible(&userui_wait_for_key); \
+}
+
+/**
+ * ui_nl_set_state - Update toi_action based on a message from userui.
+ *
+ * @n: The bit (1 << bit) to set.
+ */
+static void ui_nl_set_state(int n)
+{
+        /* Only let them change certain settings */
+        static const u32 toi_action_mask =
+                (1 << TOI_REBOOT) | (1 << TOI_PAUSE) |
+                (1 << TOI_LOGALL) |
+                (1 << TOI_SINGLESTEP) |
+                (1 << TOI_PAUSE_NEAR_PAGESET_END);
+        static unsigned long new_action;
+
+        new_action = (toi_bkd.toi_action & (~toi_action_mask)) |
+                (n & toi_action_mask);
+
+        printk(KERN_DEBUG "n is %x. Action flags being changed from %lx "
+                        "to %lx.", n, toi_bkd.toi_action, new_action);
+        toi_bkd.toi_action = new_action;
+
+        if (!test_action_state(TOI_PAUSE) &&
+                        !test_action_state(TOI_SINGLESTEP))
+                toi_stop_waiting_for_userui_key();
+}
+
+/**
+ * userui_post_atomic_restore - Tell userui that atomic restore just happened.
+ *
+ * Tell userui that atomic restore just occured, so that it can do things like
+ * redrawing the screen, re-getting settings and so on.
+ */
+static void userui_post_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+        toi_send_netlink_message(&ui_helper_data,
+                        USERUI_MSG_POST_ATOMIC_RESTORE, NULL, 0);
+}
+
+/**
+ * userui_storage_needed - Report how much memory in image header is needed.
+ */
+static int userui_storage_needed(void)
+{
+        return sizeof(ui_helper_data.program) + 1 + sizeof(int);
+}
+
+/**
+ * userui_save_config_info - Fill buffer with config info for image header.
+ *
+ * @buf: Buffer into which to put the config info we want to save.
+ */
+static int userui_save_config_info(char *buf)
+{
+        *((int *) buf) = progress_granularity;
+        memcpy(buf + sizeof(int), ui_helper_data.program,
+                        sizeof(ui_helper_data.program));
+        return sizeof(ui_helper_data.program) + sizeof(int) + 1;
+}
+
+/**
+ * userui_load_config_info - Restore config info from buffer.
+ *
+ * @buf: Buffer containing header info loaded.
+ * @size: Size of data loaded for this module.
+ */
+static void userui_load_config_info(char *buf, int size)
+{
+        progress_granularity = *((int *) buf);
+        size -= sizeof(int);
+
+        /* Don't load the saved path if one has already been set */
+        if (ui_helper_changed)
+                return;
+
+        if (size > sizeof(ui_helper_data.program))
+                size = sizeof(ui_helper_data.program);
+
+        memcpy(ui_helper_data.program, buf + sizeof(int), size);
+        ui_helper_data.program[sizeof(ui_helper_data.program)-1] = '\0';
+}
+
+/**
+ * set_ui_program_set: Record that userui program was changed.
+ *
+ * Side effect routine for when the userui program is set. In an initrd or
+ * ramfs, the user may set a location for the userui program. If this happens,
+ * we don't want to reload the value that was saved in the image header. This
+ * routine allows us to flag that we shouldn't restore the program name from
+ * the image header.
+ */
+static void set_ui_program_set(void)
+{
+        ui_helper_changed = 1;
+}
+
+/**
+ * userui_memory_needed - Tell core how much memory to reserve for us.
+ */
+static int userui_memory_needed(void)
+{
+        /* ball park figure of 128 pages */
+        return 128 * PAGE_SIZE;
+}
+
+/**
+ * userui_update_status - Update the progress bar and (if on) in-bar message.
+ *
+ * @value: Current progress percentage numerator.
+ * @maximum: Current progress percentage denominator.
+ * @fmt: Message to be displayed in the middle of the progress bar.
+ *
+ * Note that a NULL message does not mean that any previous message is erased!
+ * For that, you need toi_prepare_status with clearbar on.
+ *
+ * Returns an unsigned long, being the next numerator (as determined by the
+ * maximum and progress granularity) where status needs to be updated.
+ * This is to reduce unnecessary calls to update_status.
+ */
+static u32 userui_update_status(u32 value, u32 maximum, const char *fmt, ...)
+{
+        static u32 last_step = 9999;
+        struct userui_msg_params msg;
+        u32 this_step, next_update;
+        int bitshift;
+
+        if (ui_helper_data.pid == -1)
+                return 0;
+
+        if ((!maximum) || (!progress_granularity))
+                return maximum;
+
+        if (value < 0)
+                value = 0;
+
+        if (value > maximum)
+                value = maximum;
+
+        /* Try to avoid math problems - we can't do 64 bit math here
+         * (and shouldn't need it - anyone got screen resolution
+         * of 65536 pixels or more?) */
+        bitshift = fls(maximum) - 16;
+        if (bitshift > 0) {
+                u32 temp_maximum = maximum >> bitshift;
+                u32 temp_value = value >> bitshift;
+                this_step = (u32)
+                        (temp_value * progress_granularity / temp_maximum);
+                next_update = (((this_step + 1) * temp_maximum /
+                                        progress_granularity) + 1) << bitshift;
+        } else {
+                this_step = (u32) (value * progress_granularity / maximum);
+                next_update = ((this_step + 1) * maximum /
+                                progress_granularity) + 1;
+        }
+
+        if (this_step == last_step)
+                return next_update;
+
+        memset(&msg, 0, sizeof(msg));
+
+        msg.a = this_step;
+        msg.b = progress_granularity;
+
+        if (fmt) {
+                va_list args;
+                va_start(args, fmt);
+                vsnprintf(msg.text, sizeof(msg.text), fmt, args);
+                va_end(args);
+                msg.text[sizeof(msg.text)-1] = '\0';
+        }
+
+        toi_send_netlink_message(&ui_helper_data, USERUI_MSG_PROGRESS,
+                        &msg, sizeof(msg));
+        last_step = this_step;
+
+        return next_update;
+}
+
+/**
+ * userui_message - Display a message without necessarily logging it.
+ *
+ * @section: Type of message. Messages can be filtered by type.
+ * @level: Degree of importance of the message. Lower values = higher priority.
+ * @normally_logged: Whether logged even if log_everything is off.
+ * @fmt: Message (and parameters).
+ *
+ * This function is intended to do the same job as printk, but without normally
+ * logging what is printed. The point is to be able to get debugging info on
+ * screen without filling the logs with "1/534. ^M 2/534^M. 3/534^M"
+ *
+ * It may be called from an interrupt context - can't sleep!
+ */
+static void userui_message(u32 section, u32 level, u32 normally_logged,
+                const char *fmt, ...)
+{
+        struct userui_msg_params msg;
+
+        if ((level) && (level > console_loglevel))
+                return;
+
+        memset(&msg, 0, sizeof(msg));
+
+        msg.a = section;
+        msg.b = level;
+        msg.c = normally_logged;
+
+        if (fmt) {
+                va_list args;
+                va_start(args, fmt);
+                vsnprintf(msg.text, sizeof(msg.text), fmt, args);
+                va_end(args);
+                msg.text[sizeof(msg.text)-1] = '\0';
+        }
+
+        if (test_action_state(TOI_LOGALL))
+                printk(KERN_INFO "%s\n", msg.text);
+
+        toi_send_netlink_message(&ui_helper_data, USERUI_MSG_MESSAGE,
+                        &msg, sizeof(msg));
+}
+
+/**
+ * wait_for_key_via_userui - Wait for userui to receive a keypress.
+ */
+static void wait_for_key_via_userui(void)
+{
+        DECLARE_WAITQUEUE(wait, current);
+
+        add_wait_queue(&userui_wait_for_key, &wait);
+        set_current_state(TASK_INTERRUPTIBLE);
+
+        wait_event_interruptible(userui_wait_for_key, userui_wait_should_wake);
+        userui_wait_should_wake = false;
+
+        set_current_state(TASK_RUNNING);
+        remove_wait_queue(&userui_wait_for_key, &wait);
+}
+
+/**
+ * userui_prepare_status - Display high level messages.
+ *
+ * @clearbar: Whether to clear the progress bar.
+ * @fmt...: New message for the title.
+ *
+ * Prepare the 'nice display', drawing the header and version, along with the
+ * current action and perhaps also resetting the progress bar.
+ */
+static void userui_prepare_status(int clearbar, const char *fmt, ...)
+{
+        va_list args;
+
+        if (fmt) {
+                va_start(args, fmt);
+                lastheader_message_len = vsnprintf(lastheader, 512, fmt, args);
+                va_end(args);
+        }
+
+        if (clearbar)
+                toi_update_status(0, 1, NULL);
+
+        if (ui_helper_data.pid == -1)
+                printk(KERN_EMERG "%s\n", lastheader);
+        else
+                toi_message(0, TOI_STATUS, 1, lastheader, NULL);
+}
+
+/**
+ * toi_wait_for_keypress - Wait for keypress via userui.
+ *
+ * @timeout: Maximum time to wait.
+ *
+ * Wait for a keypress from userui.
+ *
+ * FIXME: Implement timeout?
+ */
+static char userui_wait_for_keypress(int timeout)
+{
+        char key = '\0';
+
+        if (ui_helper_data.pid != -1) {
+                wait_for_key_via_userui();
+                key = ' ';
+        }
+
+        return key;
+}
+
+/**
+ * userui_abort_hibernate - Abort a cycle & tell user if they didn't request it.
+ *
+ * @result_code: Reason why we're aborting (1 << bit).
+ * @fmt: Message to display if telling the user what's going on.
+ *
+ * Abort a cycle. If this wasn't at the user's request (and we're displaying
+ * output), tell the user why and wait for them to acknowledge the message.
+ */
+static void userui_abort_hibernate(int result_code, const char *fmt, ...)
+{
+        va_list args;
+        int printed_len = 0;
+
+        set_result_state(result_code);
+
+        if (test_result_state(TOI_ABORTED))
+                return;
+
+        set_result_state(TOI_ABORTED);
+
+        if (test_result_state(TOI_ABORT_REQUESTED))
+                return;
+
+        va_start(args, fmt);
+        printed_len = vsnprintf(local_printf_buf,  sizeof(local_printf_buf),
+                        fmt, args);
+        va_end(args);
+        if (ui_helper_data.pid != -1)
+                printed_len = sprintf(local_printf_buf + printed_len,
+                                        " (Press SPACE to continue)");
+
+        toi_prepare_status(CLEAR_BAR, "%s", local_printf_buf);
+
+        if (ui_helper_data.pid != -1)
+                userui_wait_for_keypress(0);
+}
+
+/**
+ * request_abort_hibernate - Abort hibernating or resuming at user request.
+ *
+ * Handle the user requesting the cancellation of a hibernation or resume by
+ * pressing escape.
+ */
+static void request_abort_hibernate(void)
+{
+        if (test_result_state(TOI_ABORT_REQUESTED) ||
+           !test_action_state(TOI_CAN_CANCEL))
+                return;
+
+        if (test_toi_state(TOI_NOW_RESUMING)) {
+                toi_prepare_status(CLEAR_BAR, "Escape pressed. "
+                                        "Powering down again.");
+                set_toi_state(TOI_STOP_RESUME);
+                while (!test_toi_state(TOI_IO_STOPPED))
+                        schedule();
+                if (toiActiveAllocator->mark_resume_attempted)
+                        toiActiveAllocator->mark_resume_attempted(0);
+                toi_power_down();
+        }
+
+        toi_prepare_status(CLEAR_BAR, "--- ESCAPE PRESSED :"
+                                        " ABORTING HIBERNATION ---");
+        set_abort_result(TOI_ABORT_REQUESTED);
+        toi_stop_waiting_for_userui_key();
+}
+
+/**
+ * userui_user_rcv_msg - Receive a netlink message from userui.
+ *
+ * @skb: skb received.
+ * @nlh: Netlink header received.
+ */
+static int userui_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+        int type;
+        int *data;
+
+        type = nlh->nlmsg_type;
+
+        /* A control message: ignore them */
+        if (type < NETLINK_MSG_BASE)
+                return 0;
+
+        /* Unknown message: reply with EINVAL */
+        if (type >= USERUI_MSG_MAX)
+                return -EINVAL;
+
+        /* All operations require privileges, even GET */
+        if (!capable(CAP_NET_ADMIN))
+                return -EPERM;
+
+        /* Only allow one task to receive NOFREEZE privileges */
+        if (type == NETLINK_MSG_NOFREEZE_ME && ui_helper_data.pid != -1) {
+                printk(KERN_INFO "Got NOFREEZE_ME request when "
+                        "ui_helper_data.pid is %d.\n", ui_helper_data.pid);
+                return -EBUSY;
+        }
+
+        data = (int *) NLMSG_DATA(nlh);
+
+        switch (type) {
+        case USERUI_MSG_ABORT:
+                request_abort_hibernate();
+                return 0;
+        case USERUI_MSG_GET_STATE:
+                toi_send_netlink_message(&ui_helper_data,
+                                USERUI_MSG_GET_STATE, &toi_bkd.toi_action,
+                                sizeof(toi_bkd.toi_action));
+                return 0;
+        case USERUI_MSG_GET_DEBUG_STATE:
+                toi_send_netlink_message(&ui_helper_data,
+                                USERUI_MSG_GET_DEBUG_STATE,
+                                &toi_bkd.toi_debug_state,
+                                sizeof(toi_bkd.toi_debug_state));
+                return 0;
+        case USERUI_MSG_SET_STATE:
+                if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+                        return -EINVAL;
+                ui_nl_set_state(*data);
+                return 0;
+        case USERUI_MSG_SET_DEBUG_STATE:
+                if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+                        return -EINVAL;
+                toi_bkd.toi_debug_state = (*data);
+                return 0;
+        case USERUI_MSG_SPACE:
+                toi_stop_waiting_for_userui_key();
+                return 0;
+        case USERUI_MSG_GET_POWERDOWN_METHOD:
+                toi_send_netlink_message(&ui_helper_data,
+                                USERUI_MSG_GET_POWERDOWN_METHOD,
+                                &toi_poweroff_method,
+                                sizeof(toi_poweroff_method));
+                return 0;
+        case USERUI_MSG_SET_POWERDOWN_METHOD:
+                if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(char)))
+                        return -EINVAL;
+                toi_poweroff_method = (unsigned long)(*data);
+                return 0;
+        case USERUI_MSG_GET_LOGLEVEL:
+                toi_send_netlink_message(&ui_helper_data,
+                                USERUI_MSG_GET_LOGLEVEL,
+                                &toi_bkd.toi_default_console_level,
+                                sizeof(toi_bkd.toi_default_console_level));
+                return 0;
+        case USERUI_MSG_SET_LOGLEVEL:
+                if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+                        return -EINVAL;
+                toi_bkd.toi_default_console_level = (*data);
+                return 0;
+        case USERUI_MSG_PRINTK:
+                printk(KERN_INFO "%s", (char *) data);
+                return 0;
+        }
+
+        /* Unhandled here */
+        return 1;
+}
+
+/**
+ * userui_cond_pause - Possibly pause at user request.
+ *
+ * @pause: Whether to pause or just display the message.
+ * @message: Message to display at the start of pausing.
+ *
+ * Potentially pause and wait for the user to tell us to continue. We normally
+ * only pause when @pause is set. While paused, the user can do things like
+ * changing the loglevel, toggling the display of debugging sections and such
+ * like.
+ */
+static void userui_cond_pause(int pause, char *message)
+{
+        int displayed_message = 0, last_key = 0;
+
+        while (last_key != 32 &&
+                ui_helper_data.pid != -1 &&
+                ((test_action_state(TOI_PAUSE) && pause) ||
+                 (test_action_state(TOI_SINGLESTEP)))) {
+                if (!displayed_message) {
+                        toi_prepare_status(DONT_CLEAR_BAR,
+                           "%s Press SPACE to continue.%s",
+                           message ? message : "",
+                           (test_action_state(TOI_SINGLESTEP)) ?
+                           " Single step on." : "");
+                        displayed_message = 1;
+                }
+                last_key = userui_wait_for_keypress(0);
+        }
+        schedule();
+}
+
+/**
+ * userui_prepare_console - Prepare the console for use.
+ *
+ * Prepare a console for use, saving current kmsg settings and attempting to
+ * start userui. Console loglevel changes are handled by userui.
+ */
+static void userui_prepare_console(void)
+{
+        orig_kmsg = vt_kmsg_redirect(fg_console + 1);
+
+        ui_helper_data.pid = -1;
+
+        if (!userui_ops.enabled) {
+                printk(KERN_INFO "TuxOnIce: Userui disabled.\n");
+                return;
+        }
+
+        if (*ui_helper_data.program)
+                toi_netlink_setup(&ui_helper_data);
+        else
+                printk(KERN_INFO "TuxOnIce: Userui program not configured.\n");
+}
+
+/**
+ * userui_cleanup_console - Cleanup after a cycle.
+ *
+ * Tell userui to cleanup, and restore kmsg_redirect to its original value.
+ */
+
+static void userui_cleanup_console(void)
+{
+        if (ui_helper_data.pid > -1)
+                toi_netlink_close(&ui_helper_data);
+
+        vt_kmsg_redirect(orig_kmsg);
+}
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
+        SYSFS_BIT("enable_escape", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_CAN_CANCEL, 0),
+        SYSFS_BIT("pause_between_steps", SYSFS_RW, &toi_bkd.toi_action,
+                        TOI_PAUSE, 0),
+        SYSFS_INT("enabled", SYSFS_RW, &userui_ops.enabled, 0, 1, 0, NULL),
+        SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
+                        2048, 0, NULL),
+        SYSFS_STRING("program", SYSFS_RW, ui_helper_data.program, 255, 0,
+                        set_ui_program_set),
+        SYSFS_INT("debug", SYSFS_RW, &ui_helper_data.debug, 0, 1, 0, NULL)
+#endif
+};
+
+static struct toi_module_ops userui_ops = {
+        .type                                = MISC_MODULE,
+        .name                                = "userui",
+        .shared_directory                = "user_interface",
+        .module                                = THIS_MODULE,
+        .storage_needed                        = userui_storage_needed,
+        .save_config_info                = userui_save_config_info,
+        .load_config_info                = userui_load_config_info,
+        .memory_needed                        = userui_memory_needed,
+        .post_atomic_restore                = userui_post_atomic_restore,
+        .sysfs_data                        = sysfs_params,
+        .num_sysfs_entries                = sizeof(sysfs_params) /
+                sizeof(struct toi_sysfs_data),
+};
+
+static struct ui_ops my_ui_ops = {
+        .update_status                        = userui_update_status,
+        .message                        = userui_message,
+        .prepare_status                        = userui_prepare_status,
+        .abort                                = userui_abort_hibernate,
+        .cond_pause                        = userui_cond_pause,
+        .prepare                        = userui_prepare_console,
+        .cleanup                        = userui_cleanup_console,
+        .wait_for_key                        = userui_wait_for_keypress,
+};
+
+/**
+ * toi_user_ui_init - Boot time initialisation for user interface.
+ *
+ * Invoked from the core init routine.
+ */
+static __init int toi_user_ui_init(void)
+{
+        int result;
+
+        ui_helper_data.nl = NULL;
+        strncpy(ui_helper_data.program, CONFIG_TOI_USERUI_DEFAULT_PATH, 255);
+        ui_helper_data.pid = -1;
+        ui_helper_data.skb_size = sizeof(struct userui_msg_params);
+        ui_helper_data.pool_limit = 6;
+        ui_helper_data.netlink_id = NETLINK_TOI_USERUI;
+        ui_helper_data.name = "userspace ui";
+        ui_helper_data.rcv_msg = userui_user_rcv_msg;
+        ui_helper_data.interface_version = 8;
+        ui_helper_data.must_init = 0;
+        ui_helper_data.not_ready = userui_cleanup_console;
+        init_completion(&ui_helper_data.wait_for_process);
+        result = toi_register_module(&userui_ops);
+        if (!result) {
+          result = toi_register_ui_ops(&my_ui_ops);
+          if (result)
+            toi_unregister_module(&userui_ops);
+        }
+
+        return result;
+}
+
+late_initcall(toi_user_ui_init);
diff -Nur linux-4.2/kernel/printk/printk.c linux-4.2-pck/kernel/printk/printk.c
--- linux-4.2/kernel/printk/printk.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/printk/printk.c	2015-09-08 11:20:32.500343682 -0300
@@ -33,6 +33,7 @@
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/syscalls.h>
+#include <linux/suspend.h>
 #include <linux/kexec.h>
 #include <linux/kdb.h>
 #include <linux/ratelimit.h>
@@ -280,6 +281,20 @@
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
 
+#ifdef CONFIG_TOI_INCREMENTAL
+void toi_set_logbuf_untracked(void)
+{
+    int i;
+    struct page *log_buf_start_page = virt_to_page(__log_buf);
+
+    printk("Not protecting kernel printk log buffer (%p-%p).\n",
+            __log_buf, __log_buf + __LOG_BUF_LEN);
+
+    for (i = 0; i < (1 << (CONFIG_LOG_BUF_SHIFT - PAGE_SHIFT)); i++)
+        SetPageTOI_Untracked(log_buf_start_page + i);
+}
+#endif
+
 /* Return log buffer address */
 char *log_buf_addr_get(void)
 {
@@ -1450,9 +1465,9 @@
 		    !(con->flags & CON_ANYTIME))
 			continue;
 		if (con->flags & CON_EXTENDED)
-			con->write(con, ext_text, ext_len);
+			con->write(con, ext_text, ext_len, level);
 		else
-			con->write(con, text, len);
+			con->write(con, text, len, level);
 	}
 }
 
@@ -1973,7 +1988,7 @@
 	n = vscnprintf(buf, sizeof(buf), fmt, ap);
 	va_end(ap);
 
-	early_console->write(early_console, buf, n);
+	early_console->write(early_console, buf, n, 0);
 }
 #endif
 
diff -Nur linux-4.2/kernel/sched/fair.c linux-4.2-pck/kernel/sched/fair.c
--- linux-4.2/kernel/sched/fair.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/sched/fair.c	2015-09-08 00:48:22.241696581 -0300
@@ -47,8 +47,13 @@
  * (to see the precise effective timeslice length of your workload,
  *  run vmstat and monitor the context-switches (cs) field)
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+unsigned int sysctl_sched_latency = 3000000ULL;
+unsigned int normalized_sysctl_sched_latency = 3000000ULL;
+#else
 unsigned int sysctl_sched_latency = 6000000ULL;
 unsigned int normalized_sysctl_sched_latency = 6000000ULL;
+#endif
 
 /*
  * The initial- and re-scaling of tunables is configurable
@@ -66,13 +71,22 @@
  * Minimal preemption granularity for CPU-bound tasks:
  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+unsigned int sysctl_sched_min_granularity = 300000ULL;
+unsigned int normalized_sysctl_sched_min_granularity = 300000ULL;
+#else
 unsigned int sysctl_sched_min_granularity = 750000ULL;
 unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
+#endif
 
 /*
  * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+static unsigned int sched_nr_latency = 10;
+#else
 static unsigned int sched_nr_latency = 8;
+#endif
 
 /*
  * After fork, child runs first. If set to 0 (default) then
@@ -88,10 +102,17 @@
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+unsigned int sysctl_sched_wakeup_granularity = 500000UL;
+unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL;
+
+const_debug unsigned int sysctl_sched_migration_cost = 250000UL;
+#else
 unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
 unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
 
 const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+#endif
 
 /*
  * The exponential sliding  window over which load is averaged for shares
@@ -111,8 +132,12 @@
  *
  * default: 5 msec, units: microseconds
   */
+#ifdef CONFIG_PCK_INTERACTIVE
+unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL;
+#else
 unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
 #endif
+#endif
 
 static inline void update_load_add(struct load_weight *lw, unsigned long inc)
 {
diff -Nur linux-4.2/kernel/smpboot.c linux-4.2-pck/kernel/smpboot.c
--- linux-4.2/kernel/smpboot.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/smpboot.c	2015-09-08 11:20:32.507010024 -0300
@@ -173,7 +173,7 @@
 	if (tsk)
 		return 0;
 
-	td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu));
+	td = kzalloc_node(sizeof(*td), GFP_KERNEL | ___GFP_TOI_NOTRACK, cpu_to_node(cpu));
 	if (!td)
 		return -ENOMEM;
 	td->cpu = cpu;
diff -Nur linux-4.2/kernel/workqueue.c linux-4.2-pck/kernel/workqueue.c
--- linux-4.2/kernel/workqueue.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/kernel/workqueue.c	2015-09-08 00:48:22.241696581 -0300
@@ -2614,7 +2614,7 @@
 out_unlock:
 	mutex_unlock(&wq->mutex);
 }
-EXPORT_SYMBOL_GPL(flush_workqueue);
+EXPORT_SYMBOL(flush_workqueue);
 
 /**
  * drain_workqueue - drain a workqueue
diff -Nur linux-4.2/lib/Makefile linux-4.2-pck/lib/Makefile
--- linux-4.2/lib/Makefile	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/lib/Makefile	2015-09-08 00:51:03.460668141 -0300
@@ -8,7 +8,7 @@
 endif
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
+	 rbtree.o radix-tree.o sradix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
 	 sha1.o md5.o irq_regs.o argv_split.o \
 	 proportions.o flex_proportions.o ratelimit.o show_mem.o \
diff -Nur linux-4.2/lib/sradix-tree.c linux-4.2-pck/lib/sradix-tree.c
--- linux-4.2/lib/sradix-tree.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/lib/sradix-tree.c	2015-09-08 00:51:03.460668141 -0300
@@ -0,0 +1,476 @@
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/gcd.h>
+#include <linux/sradix-tree.h>
+
+static inline int sradix_node_full(struct sradix_tree_root *root, struct sradix_tree_node *node)
+{
+	return node->fulls == root->stores_size || 
+		(node->height == 1 && node->count == root->stores_size);
+}
+
+/*
+ *	Extend a sradix tree so it can store key @index.
+ */
+static int sradix_tree_extend(struct sradix_tree_root *root, unsigned long index)
+{
+	struct sradix_tree_node *node;
+	unsigned int height;
+
+	if (unlikely(root->rnode == NULL)) {
+		if (!(node = root->alloc()))
+			return -ENOMEM;
+
+		node->height = 1;
+		root->rnode = node;
+		root->height = 1;
+	}
+
+	/* Figure out what the height should be.  */
+	height = root->height;
+	index >>= root->shift * height;
+
+	while (index) {
+		index >>= root->shift;
+		height++;
+	}
+
+	while (height > root->height) {
+		unsigned int newheight;
+		if (!(node = root->alloc()))
+			return -ENOMEM;
+
+		/* Increase the height.  */
+		node->stores[0] = root->rnode;
+		root->rnode->parent = node;
+		if (root->extend)
+			root->extend(node, root->rnode);
+
+		newheight = root->height + 1;
+		node->height = newheight;
+		node->count = 1;
+		if (sradix_node_full(root, root->rnode))
+			node->fulls = 1;
+
+		root->rnode = node;
+		root->height = newheight;
+	}
+
+	return 0;
+}
+
+/*
+ * Search the next item from the current node, that is not NULL
+ * and can satify root->iter().
+ */
+void *sradix_tree_next(struct sradix_tree_root *root,
+		       struct sradix_tree_node *node, unsigned long index,
+		       int (*iter)(void *item, unsigned long height))
+{
+	unsigned long offset;
+	void *item;
+
+	if (unlikely(node == NULL)) {
+		node = root->rnode;
+		for (offset = 0; offset < root->stores_size; offset++) {
+			item = node->stores[offset];
+			if (item && (!iter || iter(item, node->height)))
+				break;
+		}
+
+		if (unlikely(offset >= root->stores_size))
+			return NULL;
+
+		if (node->height == 1)
+			return item;
+		else
+			goto go_down;
+	}
+
+	while (node) {
+		offset = (index & root->mask) + 1;					
+		for (;offset < root->stores_size; offset++) {
+			item = node->stores[offset];
+			if (item && (!iter || iter(item, node->height)))
+				break;
+		}
+
+		if (offset < root->stores_size)
+			break;
+
+		node = node->parent;
+		index >>= root->shift;
+	}
+
+	if (!node)
+		return NULL;
+
+	while (node->height > 1) {
+go_down:
+		node = item;
+		for (offset = 0; offset < root->stores_size; offset++) {
+			item = node->stores[offset];
+			if (item && (!iter || iter(item, node->height)))
+				break;
+		}
+
+		if (unlikely(offset >= root->stores_size))
+			return NULL;
+	}
+
+	BUG_ON(offset > root->stores_size);
+
+	return item;
+}
+
+/*
+ * Blindly insert the item to the tree. Typically, we reuse the
+ * first empty store item.
+ */
+int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num)
+{
+	unsigned long index;
+	unsigned int height;
+	struct sradix_tree_node *node, *tmp = NULL;
+	int offset, offset_saved;
+	void **store = NULL;
+	int error, i, j, shift;
+
+go_on:
+	index = root->min;
+
+	if (root->enter_node && !sradix_node_full(root, root->enter_node)) {
+		node = root->enter_node;
+		BUG_ON((index >> (root->shift * root->height)));
+	} else {
+		node = root->rnode;
+		if (node == NULL || (index >> (root->shift * root->height))
+		    || sradix_node_full(root, node)) {
+			error = sradix_tree_extend(root, index);
+			if (error)
+				return error;
+
+			node = root->rnode;
+		}
+	}
+
+
+	height = node->height;
+	shift = (height - 1) * root->shift;
+	offset = (index >> shift) & root->mask;
+	while (shift > 0) {
+		offset_saved = offset;
+		for (; offset < root->stores_size; offset++) {
+			store = &node->stores[offset];
+			tmp = *store;
+
+			if (!tmp || !sradix_node_full(root, tmp))
+				break;
+		}
+		BUG_ON(offset >= root->stores_size);
+
+		if (offset != offset_saved) {
+			index += (offset - offset_saved) << shift;
+			index &= ~((1UL << shift) - 1);
+		}
+
+		if (!tmp) {
+			if (!(tmp = root->alloc()))
+				return -ENOMEM;
+
+			tmp->height = shift / root->shift;
+			*store = tmp;
+			tmp->parent = node;
+			node->count++;
+//			if (root->extend)
+//				root->extend(node, tmp);
+		}
+
+		node = tmp;
+		shift -= root->shift;
+		offset = (index >> shift) & root->mask;
+	}
+
+	BUG_ON(node->height != 1);
+
+
+	store = &node->stores[offset];
+	for (i = 0, j = 0;
+	      j < root->stores_size - node->count && 
+	      i < root->stores_size - offset && j < num; i++) {
+		if (!store[i]) {
+			store[i] = item[j];
+			if (root->assign)
+				root->assign(node, index + i, item[j]);
+			j++;
+		}
+	}
+
+	node->count += j;
+	root->num += j;
+	num -= j;
+
+	while (sradix_node_full(root, node)) {
+		node = node->parent;
+		if (!node)
+			break;
+
+		node->fulls++;
+	}
+
+	if (unlikely(!node)) {
+		/* All nodes are full */
+		root->min = 1 << (root->height * root->shift);
+		root->enter_node = NULL;
+	} else {
+		root->min = index + i - 1;
+		root->min |= (1UL << (node->height - 1)) - 1;
+		root->min++;
+		root->enter_node = node;
+	}
+
+	if (num) {
+		item += j;
+		goto go_on;
+	}
+
+	return 0;
+}
+
+
+/**
+ *	sradix_tree_shrink    -    shrink height of a sradix tree to minimal
+ *      @root		sradix tree root
+ *  
+ */
+static inline void sradix_tree_shrink(struct sradix_tree_root *root)
+{
+	/* try to shrink tree height */
+	while (root->height > 1) {
+		struct sradix_tree_node *to_free = root->rnode;
+
+		/*
+		 * The candidate node has more than one child, or its child
+		 * is not at the leftmost store, we cannot shrink.
+		 */
+		if (to_free->count != 1 || !to_free->stores[0])
+			break;
+
+		root->rnode = to_free->stores[0];
+		root->rnode->parent = NULL;
+		root->height--;
+		if (unlikely(root->enter_node == to_free)) {
+			root->enter_node = NULL;
+		}
+		root->free(to_free);
+	}
+}
+
+/*
+ * Del the item on the known leaf node and index
+ */
+void sradix_tree_delete_from_leaf(struct sradix_tree_root *root, 
+				  struct sradix_tree_node *node, unsigned long index)
+{
+	unsigned int offset;
+	struct sradix_tree_node *start, *end;
+
+	BUG_ON(node->height != 1);
+
+	start = node;
+	while (node && !(--node->count))
+		node = node->parent;
+
+	end = node;
+	if (!node) {
+		root->rnode = NULL;
+		root->height = 0;
+		root->min = 0;
+		root->num = 0;
+		root->enter_node = NULL;
+	} else {
+		offset = (index >> (root->shift * (node->height - 1))) & root->mask;
+		if (root->rm)
+			root->rm(node, offset);
+		node->stores[offset] = NULL;
+		root->num--;
+		if (root->min > index) {
+			root->min = index;
+			root->enter_node = node;
+		}
+	}
+
+	if (start != end) {
+		do {
+			node = start;
+			start = start->parent;
+			if (unlikely(root->enter_node == node))
+				root->enter_node = end;
+			root->free(node);
+		} while (start != end);
+
+		/*
+		 * Note that shrink may free "end", so enter_node still need to
+		 * be checked inside.
+		 */
+		sradix_tree_shrink(root);
+	} else if (node->count == root->stores_size - 1) {
+		/* It WAS a full leaf node. Update the ancestors */
+		node = node->parent;
+		while (node) {
+			node->fulls--;
+			if (node->fulls != root->stores_size - 1)
+				break;
+
+			node = node->parent;
+		}
+	}
+}
+
+void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index)
+{
+	unsigned int height, offset;
+	struct sradix_tree_node *node;
+	int shift;
+
+	node = root->rnode;
+	if (node == NULL || (index >> (root->shift * root->height)))
+		return NULL;
+
+	height = root->height;
+	shift = (height - 1) * root->shift;
+
+	do {
+		offset = (index >> shift) & root->mask;
+		node = node->stores[offset];
+		if (!node)
+			return NULL;
+
+		shift -= root->shift;
+	} while (shift >= 0);
+
+	return node;
+}
+
+/*
+ * Return the item if it exists, otherwise create it in place
+ * and return the created item.
+ */
+void *sradix_tree_lookup_create(struct sradix_tree_root *root, 
+			unsigned long index, void *(*item_alloc)(void))
+{
+	unsigned int height, offset;
+	struct sradix_tree_node *node, *tmp;
+	void *item;
+	int shift, error;
+
+	if (root->rnode == NULL || (index >> (root->shift * root->height))) {
+		if (item_alloc) {
+			error = sradix_tree_extend(root, index);
+			if (error)
+				return NULL;
+		} else {
+			return NULL;
+		}
+	}
+
+	node = root->rnode;
+	height = root->height;
+	shift = (height - 1) * root->shift;
+
+	do {
+		offset = (index >> shift) & root->mask;
+		if (!node->stores[offset]) {
+			if (!(tmp = root->alloc()))
+				return NULL;
+
+			tmp->height = shift / root->shift;
+			node->stores[offset] = tmp;
+			tmp->parent = node;
+			node->count++;
+			node = tmp;
+		} else {
+			node = node->stores[offset];
+		}
+
+		shift -= root->shift;
+	} while (shift > 0);
+
+	BUG_ON(node->height != 1);
+	offset = index & root->mask;
+	if (node->stores[offset]) {
+		return node->stores[offset];
+	} else if (item_alloc) {
+		if (!(item = item_alloc()))
+			return NULL;
+
+		node->stores[offset] = item;
+
+		/*
+		 * NOTE: we do NOT call root->assign here, since this item is
+		 * newly created by us having no meaning. Caller can call this
+		 * if it's necessary to do so.
+		 */
+
+		node->count++;
+		root->num++;
+
+		while (sradix_node_full(root, node)) {
+			node = node->parent;
+			if (!node)
+				break;
+
+			node->fulls++;
+		}
+
+		if (unlikely(!node)) {
+			/* All nodes are full */
+			root->min = 1 << (root->height * root->shift);
+		} else {
+			if (root->min == index) {
+				root->min |= (1UL << (node->height - 1)) - 1;
+				root->min++;
+				root->enter_node = node;
+			}
+		}
+
+		return item;
+	} else {
+		return NULL;
+	}
+
+}
+
+int sradix_tree_delete(struct sradix_tree_root *root, unsigned long index)
+{
+	unsigned int height, offset;
+	struct sradix_tree_node *node;
+	int shift;
+
+	node = root->rnode;
+	if (node == NULL || (index >> (root->shift * root->height)))
+		return -ENOENT;
+
+	height = root->height;
+	shift = (height - 1) * root->shift;
+
+	do {
+		offset = (index >> shift) & root->mask;
+		node = node->stores[offset];
+		if (!node)
+			return -ENOENT;
+
+		shift -= root->shift;
+	} while (shift > 0);
+
+	offset = index & root->mask;
+	if (!node->stores[offset])
+		return -ENOENT;
+
+	sradix_tree_delete_from_leaf(root, node, index);
+
+	return 0;
+}
diff -Nur linux-4.2/mm/Kconfig linux-4.2-pck/mm/Kconfig
--- linux-4.2/mm/Kconfig	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/Kconfig	2015-09-08 00:51:03.460668141 -0300
@@ -340,6 +340,32 @@
 	  See Documentation/vm/ksm.txt for more information: KSM is inactive
 	  until a program has madvised that an area is MADV_MERGEABLE, and
 	  root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).
+choice
+	prompt "Choose UKSM/KSM strategy"
+	default UKSM
+	depends on KSM
+	help
+	  This option allows to select a UKSM/KSM stragety.
+
+config UKSM
+	bool "Ultra-KSM for page merging"
+	depends on KSM
+	help
+	UKSM is inspired by the Linux kernel project \u2014 KSM(Kernel Same
+	page Merging), but with a fundamentally rewritten core algorithm. With
+	an advanced algorithm, UKSM now can transparently scans all anonymously
+	mapped user space applications with an significantly improved scan speed
+	and CPU efficiency. Since KVM is friendly to KSM, KVM can also benefit from
+	UKSM. Now UKSM has its first stable release and first real world enterprise user.
+	For more information, please goto its project page.
+	(www.kerneldedup.org)
+
+config KSM_LEGACY
+	bool "Legacy KSM implementation"
+	depends on KSM
+	help
+	The legacy KSM implementation from Redhat.
+endchoice
 
 config DEFAULT_MMAP_MIN_ADDR
         int "Low address space to protect from user allocation"
diff -Nur linux-4.2/mm/Makefile linux-4.2-pck/mm/Makefile
--- linux-4.2/mm/Makefile	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/Makefile	2015-09-08 00:51:03.464001315 -0300
@@ -47,7 +47,8 @@
 obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
-obj-$(CONFIG_KSM) += ksm.o
+obj-$(CONFIG_KSM_LEGACY) += ksm.o
+obj-$(CONFIG_UKSM) += uksm.o
 obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
diff -Nur linux-4.2/mm/debug.c linux-4.2-pck/mm/debug.c
--- linux-4.2/mm/debug.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/debug.c	2015-09-08 11:20:32.533675385 -0300
@@ -48,6 +48,12 @@
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	{1UL << PG_compound_lock,	"compound_lock"	},
 #endif
+#ifdef CONFIG_TOI_INCREMENTAL
+	{1UL << PG_toi_untracked,	"toi_untracked"	},
+	{1UL << PG_toi_ro,		"toi_ro"	},
+	{1UL << PG_toi_cbw,		"toi_cbw"	},
+	{1UL << PG_toi_dirty,		"toi_dirty"	},
+#endif
 };
 
 static void dump_flags(unsigned long flags,
diff -Nur linux-4.2/mm/memory.c linux-4.2-pck/mm/memory.c
--- linux-4.2/mm/memory.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/memory.c	2015-09-08 00:53:25.270528186 -0300
@@ -120,6 +120,28 @@
 
 EXPORT_SYMBOL(zero_pfn);
 
+#ifdef CONFIG_UKSM
+unsigned long uksm_zero_pfn __read_mostly;
+EXPORT_SYMBOL_GPL(uksm_zero_pfn);
+struct page *empty_uksm_zero_page;
+
+static int __init setup_uksm_zero_page(void)
+{
+	unsigned long addr;
+	addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, 0);
+	if (!addr)
+		panic("Oh boy, that early out of memory?");
+
+	empty_uksm_zero_page = virt_to_page((void *) addr);
+	SetPageReserved(empty_uksm_zero_page);
+
+	uksm_zero_pfn = page_to_pfn(empty_uksm_zero_page);
+
+	return 0;
+}
+core_initcall(setup_uksm_zero_page);
+#endif
+
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
  */
@@ -131,6 +153,7 @@
 core_initcall(init_zero_pfn);
 
 
+
 #if defined(SPLIT_RSS_COUNTING)
 
 void sync_mm_rss(struct mm_struct *mm)
@@ -877,6 +900,11 @@
 			rss[MM_ANONPAGES]++;
 		else
 			rss[MM_FILEPAGES]++;
+
+		/* Should return NULL in vm_normal_page() */
+		uksm_bugon_zeropage(pte);
+	} else {
+		uksm_map_zero_page(pte);
 	}
 
 out_set_pte:
@@ -1110,8 +1138,10 @@
 			ptent = ptep_get_and_clear_full(mm, addr, pte,
 							tlb->fullmm);
 			tlb_remove_tlb_entry(tlb, pte, addr);
-			if (unlikely(!page))
+			if (unlikely(!page)) {
+				uksm_unmap_zero_page(ptent);
 				continue;
+			}
 			if (PageAnon(page))
 				rss[MM_ANONPAGES]--;
 			else {
@@ -1944,8 +1974,10 @@
 			clear_page(kaddr);
 		kunmap_atomic(kaddr);
 		flush_dcache_page(dst);
-	} else
+	} else {
 		copy_user_highpage(dst, src, va, vma);
+		uksm_cow_page(vma, src);
+	}
 }
 
 /*
@@ -2075,6 +2107,7 @@
 		new_page = alloc_zeroed_user_highpage_movable(vma, address);
 		if (!new_page)
 			goto oom;
+		uksm_cow_pte(vma, orig_pte);
 	} else {
 		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
 		if (!new_page)
@@ -2099,7 +2132,9 @@
 				dec_mm_counter_fast(mm, MM_FILEPAGES);
 				inc_mm_counter_fast(mm, MM_ANONPAGES);
 			}
+			uksm_bugon_zeropage(orig_pte);
 		} else {
+			uksm_unmap_zero_page(orig_pte);
 			inc_mm_counter_fast(mm, MM_ANONPAGES);
 		}
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
diff -Nur linux-4.2/mm/mmap.c linux-4.2-pck/mm/mmap.c
--- linux-4.2/mm/mmap.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/mmap.c	2015-09-08 00:51:03.464001315 -0300
@@ -41,6 +41,7 @@
 #include <linux/notifier.h>
 #include <linux/memory.h>
 #include <linux/printk.h>
+#include <linux/ksm.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -276,6 +277,7 @@
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	mpol_put(vma_policy(vma));
+	uksm_remove_vma(vma);
 	kmem_cache_free(vm_area_cachep, vma);
 	return next;
 }
@@ -733,9 +735,16 @@
 	long adjust_next = 0;
 	int remove_next = 0;
 
+/*
+ * to avoid deadlock, ksm_remove_vma must be done before any spin_lock is
+ * acquired
+ */
+	uksm_remove_vma(vma);
+
 	if (next && !insert) {
 		struct vm_area_struct *exporter = NULL;
 
+		uksm_remove_vma(next);
 		if (end >= next->vm_end) {
 			/*
 			 * vma expands, overlapping all the next, and
@@ -829,6 +838,7 @@
 		end_changed = true;
 	}
 	vma->vm_pgoff = pgoff;
+
 	if (adjust_next) {
 		next->vm_start += adjust_next << PAGE_SHIFT;
 		next->vm_pgoff += adjust_next;
@@ -899,16 +909,22 @@
 		 * up the code too much to do both in one go.
 		 */
 		next = vma->vm_next;
-		if (remove_next == 2)
+		if (remove_next == 2) {
+			uksm_remove_vma(next);
 			goto again;
-		else if (next)
+		} else if (next) {
 			vma_gap_update(next);
-		else
+		} else {
 			mm->highest_vm_end = end;
+		}
+	} else {
+		if (next && !insert)
+			uksm_vma_add_new(next);
 	}
 	if (insert && file)
 		uprobe_mmap(insert);
 
+	uksm_vma_add_new(vma);
 	validate_mm(mm);
 
 	return 0;
@@ -1301,6 +1317,9 @@
 	vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
 			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
+	/* If uksm is enabled, we add VM_MERGABLE to new VMAs. */
+	uksm_vm_flags_mod(&vm_flags);
+
 	if (flags & MAP_LOCKED)
 		if (!can_do_mlock())
 			return -EPERM;
@@ -1642,6 +1661,7 @@
 			allow_write_access(file);
 	}
 	file = vma->vm_file;
+	uksm_vma_add_new(vma);
 out:
 	perf_event_mmap(vma);
 
@@ -1683,6 +1703,7 @@
 	if (vm_flags & VM_DENYWRITE)
 		allow_write_access(file);
 free_vma:
+	uksm_remove_vma(vma);
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
 	if (charged)
@@ -2484,6 +2505,8 @@
 	else
 		err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
 
+	uksm_vma_add_new(new);
+
 	/* Success. */
 	if (!err)
 		return 0;
@@ -2721,6 +2744,7 @@
 		return addr;
 
 	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+	uksm_vm_flags_mod(&flags);
 
 	error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
 	if (error & ~PAGE_MASK)
@@ -2778,6 +2802,7 @@
 	vma->vm_flags = flags;
 	vma->vm_page_prot = vm_get_page_prot(flags);
 	vma_link(mm, vma, prev, rb_link, rb_parent);
+	uksm_vma_add_new(vma);
 out:
 	perf_event_mmap(vma);
 	mm->total_vm += len >> PAGE_SHIFT;
@@ -2813,6 +2838,12 @@
 	/* mm's last user has gone, and its about to be pulled down */
 	mmu_notifier_release(mm);
 
+	/*
+	 * Taking write lock on mmap_sem does not harm others,
+	 * but it's crucial for uksm to avoid races.
+	 */
+	down_write(&mm->mmap_sem);
+
 	if (mm->locked_vm) {
 		vma = mm->mmap;
 		while (vma) {
@@ -2848,6 +2879,11 @@
 		vma = remove_vma(vma);
 	}
 	vm_unacct_memory(nr_accounted);
+
+	mm->mmap = NULL;
+	mm->mm_rb = RB_ROOT;
+	vmacache_invalidate(mm);
+	up_write(&mm->mmap_sem);
 }
 
 /* Insert vm structure into process list sorted by address
@@ -2954,6 +2990,7 @@
 				new_vma->vm_ops->open(new_vma);
 			vma_link(mm, new_vma, prev, rb_link, rb_parent);
 			*need_rmap_locks = false;
+			uksm_vma_add_new(new_vma);
 		}
 	}
 	return new_vma;
@@ -3067,10 +3104,10 @@
 	ret = insert_vm_struct(mm, vma);
 	if (ret)
 		goto out;
-
 	mm->total_vm += len >> PAGE_SHIFT;
 
 	perf_event_mmap(vma);
+	uksm_vma_add_new(vma);
 
 	return vma;
 
diff -Nur linux-4.2/mm/page-writeback.c linux-4.2-pck/mm/page-writeback.c
--- linux-4.2/mm/page-writeback.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/page-writeback.c	2015-09-08 00:48:22.241696581 -0300
@@ -70,13 +70,21 @@
 /*
  * Start background writeback (via writeback threads) at this percentage
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+int dirty_background_ratio;
+#else
 int dirty_background_ratio = 10;
+#endif
 
 /*
  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
  * dirty_background_ratio * the amount of dirtyable memory
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+unsigned long dirty_background_bytes = 128 * 1024 * 1024;
+#else
 unsigned long dirty_background_bytes;
+#endif
 
 /*
  * free highmem will not be subtracted from the total free memory
@@ -87,13 +95,21 @@
 /*
  * The generator of dirty data starts writeback at this percentage
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+int vm_dirty_ratio;
+#else
 int vm_dirty_ratio = 20;
+#endif
 
 /*
  * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
  * vm_dirty_ratio * the amount of dirtyable memory
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+unsigned long vm_dirty_bytes = 256 * 1024 * 1024;
+#else
 unsigned long vm_dirty_bytes;
+#endif
 
 /*
  * The interval between `kupdate'-style writebacks
diff -Nur linux-4.2/mm/page_alloc.c linux-4.2-pck/mm/page_alloc.c
--- linux-4.2/mm/page_alloc.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/page_alloc.c	2015-09-08 11:20:32.543674897 -0300
@@ -61,6 +61,7 @@
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
 #include <linux/page_owner.h>
+#include <linux/tuxonice.h>
 #include <linux/kthread.h>
 
 #include <asm/sections.h>
@@ -726,6 +727,12 @@
 	if (unlikely(page->mem_cgroup))
 		bad_reason = "page still charged to cgroup";
 #endif
+        if (unlikely(PageTOI_Untracked(page))) {
+            // Make it writable and included in image if allocated.
+            ClearPageTOI_Untracked(page);
+            // If it gets allocated, it will be dirty from TOI's POV.
+            SetPageTOI_Dirty(page);
+        }
 	if (unlikely(bad_reason)) {
 		bad_page(page, bad_reason, bad_flags);
 		return 1;
@@ -1324,6 +1331,11 @@
 		struct page *p = page + i;
 		if (unlikely(check_new_page(p)))
 			return 1;
+                if (unlikely(toi_incremental_support() && gfp_flags & ___GFP_TOI_NOTRACK)) {
+                    // Make the page writable if it's protected, and set it to be untracked.
+                    SetPageTOI_Untracked(p);
+                    toi_make_writable(init_mm.pgd, (unsigned long) page_address(p));
+                }
 	}
 
 	set_page_private(page, 0);
diff -Nur linux-4.2/mm/percpu.c linux-4.2-pck/mm/percpu.c
--- linux-4.2/mm/percpu.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/percpu.c	2015-09-08 11:20:32.543674897 -0300
@@ -125,6 +125,7 @@
 static int pcpu_atom_size __read_mostly;
 static int pcpu_nr_slots __read_mostly;
 static size_t pcpu_chunk_struct_size __read_mostly;
+static int pcpu_pfns;
 
 /* cpus with the lowest and highest unit addresses */
 static unsigned int pcpu_low_unit_cpu __read_mostly;
@@ -1795,6 +1796,7 @@
 	/* calculate size_sum and ensure dyn_size is enough for early alloc */
 	size_sum = PFN_ALIGN(static_size + reserved_size +
 			    max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
+        pcpu_pfns = PFN_DOWN(size_sum);
 	dyn_size = size_sum - static_size - reserved_size;
 
 	/*
@@ -2282,6 +2284,22 @@
 	}
 }
 
+#ifdef CONFIG_TOI_INCREMENTAL
+/*
+ * It doesn't matter if we mark an extra page as untracked (and therefore
+ * always save it in incremental images).
+ */
+void toi_mark_per_cpus_pages_untracked(void)
+{
+    int i;
+
+    struct page *page = virt_to_page(pcpu_base_addr);
+
+    for (i = 0; i < pcpu_pfns; i++)
+        SetPageTOI_Untracked(page + i);
+}
+#endif
+
 /*
  * Percpu allocator is initialized early during boot when neither slab or
  * workqueue is available.  Plug async management until everything is up
diff -Nur linux-4.2/mm/rmap.c linux-4.2-pck/mm/rmap.c
--- linux-4.2/mm/rmap.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/rmap.c	2015-09-08 00:51:03.464001315 -0300
@@ -962,9 +962,9 @@
 
 /**
  * __page_set_anon_rmap - set up new anonymous rmap
- * @page:	Page to add to rmap	
+ * @page:	Page to add to rmap
  * @vma:	VM area to add page to.
- * @address:	User virtual address of the mapping	
+ * @address:	User virtual address of the mapping
  * @exclusive:	the page is exclusively owned by the current process
  */
 static void __page_set_anon_rmap(struct page *page,
diff -Nur linux-4.2/mm/shmem.c linux-4.2-pck/mm/shmem.c
--- linux-4.2/mm/shmem.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/shmem.c	2015-09-08 11:20:32.543674897 -0300
@@ -1396,7 +1396,7 @@
 }
 
 static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
-				     umode_t mode, dev_t dev, unsigned long flags)
+				     umode_t mode, dev_t dev, unsigned long flags, int atomic_copy)
 {
 	struct inode *inode;
 	struct shmem_inode_info *info;
@@ -1417,6 +1417,8 @@
 		spin_lock_init(&info->lock);
 		info->seals = F_SEAL_SEAL;
 		info->flags = flags & VM_NORESERVE;
+		if (atomic_copy)
+			inode->i_flags |= S_ATOMIC_COPY;
 		INIT_LIST_HEAD(&info->swaplist);
 		simple_xattrs_init(&info->xattrs);
 		cache_no_acl(inode);
@@ -2206,7 +2208,7 @@
 	struct inode *inode;
 	int error = -ENOSPC;
 
-	inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
+	inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE, 0);
 	if (inode) {
 		error = simple_acl_create(dir, inode);
 		if (error)
@@ -2235,7 +2237,7 @@
 	struct inode *inode;
 	int error = -ENOSPC;
 
-	inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
+	inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE, 0);
 	if (inode) {
 		error = security_inode_init_security(inode, dir,
 						     NULL,
@@ -2428,7 +2430,7 @@
 	if (len > PAGE_CACHE_SIZE)
 		return -ENAMETOOLONG;
 
-	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
+	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE, 0);
 	if (!inode)
 		return -ENOSPC;
 
@@ -2946,7 +2948,7 @@
 		goto err_name;
 	}
 
-	file = shmem_file_setup(name, 0, VM_NORESERVE);
+	file = shmem_file_setup(name, 0, VM_NORESERVE, 0);
 	if (IS_ERR(file)) {
 		error = PTR_ERR(file);
 		goto err_fd;
@@ -3037,7 +3039,7 @@
 	sb->s_flags |= MS_POSIXACL;
 #endif
 
-	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
+	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE, 0);
 	if (!inode)
 		goto failed;
 	inode->i_uid = sbinfo->uid;
@@ -3291,7 +3293,7 @@
 
 #define shmem_vm_ops				generic_file_vm_ops
 #define shmem_file_operations			ramfs_file_operations
-#define shmem_get_inode(sb, dir, mode, dev, flags)	ramfs_get_inode(sb, dir, mode, dev)
+#define shmem_get_inode(sb, dir, mode, dev, flags, atomic_copy)	ramfs_get_inode(sb, dir, mode, dev)
 #define shmem_acct_size(flags, size)		0
 #define shmem_unacct_size(flags, size)		do {} while (0)
 
@@ -3304,7 +3306,8 @@
 };
 
 static struct file *__shmem_file_setup(const char *name, loff_t size,
-				       unsigned long flags, unsigned int i_flags)
+				       unsigned long flags, unsigned int i_flags,
+				       int atomic_copy)
 {
 	struct file *res;
 	struct inode *inode;
@@ -3333,7 +3336,7 @@
 	d_set_d_op(path.dentry, &anon_ops);
 
 	res = ERR_PTR(-ENOSPC);
-	inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
+	inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags, atomic_copy);
 	if (!inode)
 		goto put_memory;
 
@@ -3369,9 +3372,9 @@
  * @size: size to be set for the file
  * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
  */
-struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
+struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags, int atomic_copy)
 {
-	return __shmem_file_setup(name, size, flags, S_PRIVATE);
+	return __shmem_file_setup(name, size, flags, S_PRIVATE, atomic_copy);
 }
 
 /**
@@ -3380,9 +3383,9 @@
  * @size: size to be set for the file
  * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
  */
-struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags, int atomic_copy)
 {
-	return __shmem_file_setup(name, size, flags, 0);
+	return __shmem_file_setup(name, size, flags, 0, atomic_copy);
 }
 EXPORT_SYMBOL_GPL(shmem_file_setup);
 
@@ -3401,7 +3404,7 @@
 	 * accessible to the user through its mapping, use S_PRIVATE flag to
 	 * bypass file security, in the same way as shmem_kernel_file_setup().
 	 */
-	file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE);
+	file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE, 0);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
diff -Nur linux-4.2/mm/slub.c linux-4.2-pck/mm/slub.c
--- linux-4.2/mm/slub.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/slub.c	2015-09-08 11:20:32.547008068 -0300
@@ -1315,7 +1315,7 @@
 	struct page *page;
 	int order = oo_order(oo);
 
-	flags |= __GFP_NOTRACK;
+	flags |= (__GFP_NOTRACK | ___GFP_TOI_NOTRACK);
 
 	if (memcg_charge_slab(s, flags, order))
 		return NULL;
@@ -3331,7 +3331,7 @@
 	struct page *page;
 	void *ptr = NULL;
 
-	flags |= __GFP_COMP | __GFP_NOTRACK;
+	flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_TOI_NOTRACK;
 	page = alloc_kmem_pages_node(node, flags, get_order(size));
 	if (page)
 		ptr = page_address(page);
diff -Nur linux-4.2/mm/swapfile.c linux-4.2-pck/mm/swapfile.c
--- linux-4.2/mm/swapfile.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/swapfile.c	2015-09-08 11:20:32.550341239 -0300
@@ -9,6 +9,7 @@
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/slab.h>
+#include <linux/export.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
 #include <linux/vmalloc.h>
@@ -43,7 +44,6 @@
 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 				 unsigned char);
 static void free_swap_count_continuations(struct swap_info_struct *);
-static sector_t map_swap_entry(swp_entry_t, struct block_device**);
 
 DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
@@ -722,6 +722,60 @@
 	return (swp_entry_t) {0};
 }
 
+static unsigned int find_next_to_unuse(struct swap_info_struct *si,
+					unsigned int prev, bool frontswap);
+
+void get_swap_range_of_type(int type, swp_entry_t *start, swp_entry_t *end,
+		unsigned int limit)
+{
+	struct swap_info_struct *si;
+	pgoff_t start_at;
+	unsigned int i;
+
+	*start = swp_entry(0, 0);
+	*end = swp_entry(0, 0);
+	si = swap_info[type];
+	spin_lock(&si->lock);
+	if (si && (si->flags & SWP_WRITEOK)) {
+		atomic_long_dec(&nr_swap_pages);
+		/* This is called for allocating swap entry, not cache */
+		start_at = scan_swap_map(si, 1);
+		if (start_at) {
+			unsigned long stop_at = find_next_to_unuse(si, start_at, 0);
+			if (stop_at > start_at)
+				stop_at--;
+			else
+				stop_at = si->max - 1;
+			if (stop_at - start_at + 1 > limit)
+				stop_at = min_t(unsigned int,
+						start_at + limit - 1,
+						si->max - 1);
+			/* Mark them used */
+			for (i = start_at; i <= stop_at; i++)
+				si->swap_map[i] = 1;
+			/* first page already done above */
+			si->inuse_pages += stop_at - start_at;
+
+			atomic_long_sub(stop_at - start_at, &nr_swap_pages);
+			if (start_at == si->lowest_bit)
+				si->lowest_bit = stop_at + 1;
+			if (stop_at == si->highest_bit)
+				si->highest_bit = start_at - 1;
+			if (si->inuse_pages == si->pages) {
+				si->lowest_bit = si->max;
+				si->highest_bit = 0;
+			}
+			for (i = start_at + 1; i <= stop_at; i++)
+				inc_cluster_info_page(si, si->cluster_info, i);
+			si->cluster_next = stop_at + 1;
+			*start = swp_entry(type, start_at);
+			*end = swp_entry(type, stop_at);
+		} else
+			atomic_long_inc(&nr_swap_pages);
+	}
+	spin_unlock(&si->lock);
+}
+
 static struct swap_info_struct *swap_info_get(swp_entry_t entry)
 {
 	struct swap_info_struct *p;
@@ -1576,7 +1630,7 @@
  * Note that the type of this function is sector_t, but it returns page offset
  * into the bdev, not sector offset.
  */
-static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
+sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
 {
 	struct swap_info_struct *sis;
 	struct swap_extent *start_se;
@@ -2721,8 +2775,14 @@
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 	return swp_offset(swap);
 }
+
 EXPORT_SYMBOL_GPL(__page_file_index);
 
+struct swap_info_struct *get_swap_info_struct(unsigned type)
+{
+	return swap_info[type];
+}
+
 /*
  * add_swap_count_continuation - called when a swap count is duplicated
  * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
diff -Nur linux-4.2/mm/uksm.c linux-4.2-pck/mm/uksm.c
--- linux-4.2/mm/uksm.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/mm/uksm.c	2015-09-08 00:51:03.470667662 -0300
@@ -0,0 +1,5525 @@
+/*
+ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
+ *
+ * This is an improvement upon KSM. Some basic data structures and routines
+ * are borrowed from ksm.c .
+ *
+ * Its new features:
+ * 1. Full system scan:
+ *      It automatically scans all user processes' anonymous VMAs. Kernel-user
+ *      interaction to submit a memory area to KSM is no longer needed.
+ *
+ * 2. Rich area detection:
+ *      It automatically detects rich areas containing abundant duplicated
+ *      pages based. Rich areas are given a full scan speed. Poor areas are
+ *      sampled at a reasonable speed with very low CPU consumption.
+ *
+ * 3. Ultra Per-page scan speed improvement:
+ *      A new hash algorithm is proposed. As a result, on a machine with
+ *      Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
+ *      can scan memory areas that does not contain duplicated pages at speed of
+ *      627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
+ *      477MB/sec ~ 923MB/sec.
+ *
+ * 4. Thrashing area avoidance:
+ *      Thrashing area(an VMA that has frequent Ksm page break-out) can be
+ *      filtered out. My benchmark shows it's more efficient than KSM's per-page
+ *      hash value based volatile page detection.
+ *
+ *
+ * 5. Misc changes upon KSM:
+ *      * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
+ *        comparison. It's much faster than default C version on x86.
+ *      * rmap_item now has an struct *page member to loosely cache a
+ *        address-->page mapping, which reduces too much time-costly
+ *        follow_page().
+ *      * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
+ *      * try_to_merge_two_pages() now can revert a pte if it fails. No break_
+ *        ksm is needed for this case.
+ *
+ * 6. Full Zero Page consideration(contributed by Figo Zhang)
+ *    Now uksmd consider full zero pages as special pages and merge them to an
+ *    special unswappable uksm zero page.
+ */
+
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/rwsem.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/spinlock.h>
+#include <linux/jhash.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/memory.h>
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+#include <linux/ksm.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <crypto/hash.h>
+#include <linux/random.h>
+#include <linux/math64.h>
+#include <linux/gcd.h>
+#include <linux/freezer.h>
+#include <linux/sradix-tree.h>
+
+#include <asm/tlbflush.h>
+#include "internal.h"
+
+#ifdef CONFIG_X86
+#undef memcmp
+
+#ifdef CONFIG_X86_32
+#define memcmp memcmpx86_32
+/*
+ * Compare 4-byte-aligned address s1 and s2, with length n
+ */
+int memcmpx86_32(void *s1, void *s2, size_t n)
+{
+	size_t num = n / 4;
+	register int res;
+
+	__asm__ __volatile__
+	(
+	 "testl %3,%3\n\t"
+	 "repe; cmpsd\n\t"
+	 "je        1f\n\t"
+	 "sbbl      %0,%0\n\t"
+	 "orl       $1,%0\n"
+	 "1:"
+	 : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
+	 : "0" (0)
+	 : "cc");
+
+	return res;
+}
+
+/*
+ * Check the page is all zero ?
+ */
+static int is_full_zero(const void *s1, size_t len)
+{
+	unsigned char same;
+
+	len /= 4;
+
+	__asm__ __volatile__
+	("repe; scasl;"
+	 "sete %0"
+	 : "=qm" (same), "+D" (s1), "+c" (len)
+	 : "a" (0)
+	 : "cc");
+
+	return same;
+}
+
+
+#elif defined(CONFIG_X86_64)
+#define memcmp memcmpx86_64
+/*
+ * Compare 8-byte-aligned address s1 and s2, with length n
+ */
+int memcmpx86_64(void *s1, void *s2, size_t n)
+{
+	size_t num = n / 8;
+	register int res;
+
+	__asm__ __volatile__
+	(
+	 "testq %q3,%q3\n\t"
+	 "repe; cmpsq\n\t"
+	 "je        1f\n\t"
+	 "sbbq      %q0,%q0\n\t"
+	 "orq       $1,%q0\n"
+	 "1:"
+	 : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
+	 : "0" (0)
+	 : "cc");
+
+	return res;
+}
+
+static int is_full_zero(const void *s1, size_t len)
+{
+	unsigned char same;
+
+	len /= 8;
+
+	__asm__ __volatile__
+	("repe; scasq;"
+	 "sete %0"
+	 : "=qm" (same), "+D" (s1), "+c" (len)
+	 : "a" (0)
+	 : "cc");
+
+	return same;
+}
+
+#endif
+#else
+static int is_full_zero(const void *s1, size_t len)
+{
+	unsigned long *src = s1;
+	int i;
+
+	len /= sizeof(*src);
+
+	for (i = 0; i < len; i++) {
+		if (src[i])
+			return 0;
+	}
+
+	return 1;
+}
+#endif
+
+#define UKSM_RUNG_ROUND_FINISHED  (1 << 0)
+#define TIME_RATIO_SCALE	10000
+
+#define SLOT_TREE_NODE_SHIFT	8
+#define SLOT_TREE_NODE_STORE_SIZE	(1UL << SLOT_TREE_NODE_SHIFT)
+struct slot_tree_node {
+	unsigned long size;
+	struct sradix_tree_node snode;
+	void *stores[SLOT_TREE_NODE_STORE_SIZE];
+};
+
+static struct kmem_cache *slot_tree_node_cachep;
+
+static struct sradix_tree_node *slot_tree_node_alloc(void)
+{
+	struct slot_tree_node *p;
+	p = kmem_cache_zalloc(slot_tree_node_cachep, GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	return &p->snode;
+}
+
+static void slot_tree_node_free(struct sradix_tree_node *node)
+{
+	struct slot_tree_node *p;
+
+	p = container_of(node, struct slot_tree_node, snode);
+	kmem_cache_free(slot_tree_node_cachep, p);
+}
+
+static void slot_tree_node_extend(struct sradix_tree_node *parent,
+				  struct sradix_tree_node *child)
+{
+	struct slot_tree_node *p, *c;
+
+	p = container_of(parent, struct slot_tree_node, snode);
+	c = container_of(child, struct slot_tree_node, snode);
+
+	p->size += c->size;
+}
+
+void slot_tree_node_assign(struct sradix_tree_node *node,
+			   unsigned index, void *item)
+{
+	struct vma_slot *slot = item;
+	struct slot_tree_node *cur;
+
+	slot->snode = node;
+	slot->sindex = index;
+
+	while (node) {
+		cur = container_of(node, struct slot_tree_node, snode);
+		cur->size += slot->pages;
+		node = node->parent;
+	}
+}
+
+void slot_tree_node_rm(struct sradix_tree_node *node, unsigned offset)
+{
+	struct vma_slot *slot;
+	struct slot_tree_node *cur;
+	unsigned long pages;
+
+	if (node->height == 1) {
+		slot = node->stores[offset];
+		pages = slot->pages;
+	} else {
+		cur = container_of(node->stores[offset],
+				   struct slot_tree_node, snode);
+		pages = cur->size;
+	}
+
+	while (node) {
+		cur = container_of(node, struct slot_tree_node, snode);
+		cur->size -= pages;
+		node = node->parent;
+	}
+}
+
+unsigned long slot_iter_index;
+int slot_iter(void *item,  unsigned long height)
+{
+	struct slot_tree_node *node;
+	struct vma_slot *slot;
+
+	if (height == 1) {
+		slot = item;
+		if (slot_iter_index < slot->pages) {
+			/*in this one*/
+			return 1;
+		} else {
+			slot_iter_index -= slot->pages;
+			return 0;
+		}
+
+	} else {
+		node = container_of(item, struct slot_tree_node, snode);
+		if (slot_iter_index < node->size) {
+			/*in this one*/
+			return 1;
+		} else {
+			slot_iter_index -= node->size;
+			return 0;
+		}
+	}
+}
+
+
+static inline void slot_tree_init_root(struct sradix_tree_root *root)
+{
+	init_sradix_tree_root(root, SLOT_TREE_NODE_SHIFT);
+	root->alloc = slot_tree_node_alloc;
+	root->free = slot_tree_node_free;
+	root->extend = slot_tree_node_extend;
+	root->assign = slot_tree_node_assign;
+	root->rm = slot_tree_node_rm;
+}
+
+void slot_tree_init(void)
+{
+	slot_tree_node_cachep = kmem_cache_create("slot_tree_node",
+				sizeof(struct slot_tree_node), 0,
+				SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
+				NULL);
+}
+
+
+/* Each rung of this ladder is a list of VMAs having a same scan ratio */
+struct scan_rung {
+	//struct list_head scanned_list;
+	struct sradix_tree_root vma_root;
+	struct sradix_tree_root vma_root2;
+
+	struct vma_slot *current_scan;
+	unsigned long current_offset;
+
+	/*
+	 * The initial value for current_offset, it should loop over
+	 * [0~ step - 1] to let all slot have its chance to be scanned.
+	 */
+	unsigned long offset_init;
+	unsigned long step; /* dynamic step for current_offset */
+	unsigned int flags;
+	unsigned long pages_to_scan;
+	//unsigned long fully_scanned_slots;
+	/*
+	 * a little bit tricky - if cpu_time_ratio > 0, then the value is the
+	 * the cpu time ratio it can spend in rung_i for every scan
+	 * period. if < 0, then it is the cpu time ratio relative to the
+	 * max cpu percentage user specified. Both in unit of
+	 * 1/TIME_RATIO_SCALE
+	 */
+	int cpu_ratio;
+
+	/*
+	 * How long it will take for all slots in this rung to be fully
+	 * scanned? If it's zero, we don't care about the cover time:
+	 * it's fully scanned.
+	 */
+	unsigned int cover_msecs;
+	//unsigned long vma_num;
+	//unsigned long pages; /* Sum of all slot's pages in rung */
+};
+
+/**
+ * node of either the stable or unstale rbtree
+ *
+ */
+struct tree_node {
+	struct rb_node node; /* link in the main (un)stable rbtree */
+	struct rb_root sub_root; /* rb_root for sublevel collision rbtree */
+	u32 hash;
+	unsigned long count; /* TODO: merged with sub_root */
+	struct list_head all_list; /* all tree nodes in stable/unstable tree */
+};
+
+/**
+ * struct stable_node - node of the stable rbtree
+ * @node: rb node of this ksm page in the stable tree
+ * @hlist: hlist head of rmap_items using this ksm page
+ * @kpfn: page frame number of this ksm page
+ */
+struct stable_node {
+	struct rb_node node; /* link in sub-rbtree */
+	struct tree_node *tree_node; /* it's tree node root in stable tree, NULL if it's in hell list */
+	struct hlist_head hlist;
+	unsigned long kpfn;
+	u32 hash_max; /* if ==0 then it's not been calculated yet */
+	struct list_head all_list; /* in a list for all stable nodes */
+};
+
+/**
+ * struct node_vma - group rmap_items linked in a same stable
+ * node together.
+ */
+struct node_vma {
+	union {
+		struct vma_slot *slot;
+		unsigned long key;  /* slot is used as key sorted on hlist */
+	};
+	struct hlist_node hlist;
+	struct hlist_head rmap_hlist;
+	struct stable_node *head;
+};
+
+/**
+ * struct rmap_item - reverse mapping item for virtual addresses
+ * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
+ * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
+ * @mm: the memory structure this rmap_item is pointing into
+ * @address: the virtual address this rmap_item tracks (+ flags in low bits)
+ * @node: rb node of this rmap_item in the unstable tree
+ * @head: pointer to stable_node heading this list in the stable tree
+ * @hlist: link into hlist of rmap_items hanging off that stable_node
+ */
+struct rmap_item {
+	struct vma_slot *slot;
+	struct page *page;
+	unsigned long address;	/* + low bits used for flags below */
+	unsigned long hash_round;
+	unsigned long entry_index;
+	union {
+		struct {/* when in unstable tree */
+			struct rb_node node;
+			struct tree_node *tree_node;
+			u32 hash_max;
+		};
+		struct { /* when in stable tree */
+			struct node_vma *head;
+			struct hlist_node hlist;
+			struct anon_vma *anon_vma;
+		};
+	};
+} __attribute__((aligned(4)));
+
+struct rmap_list_entry {
+	union {
+		struct rmap_item *item;
+		unsigned long addr;
+	};
+	/* lowest bit is used for is_addr tag */
+} __attribute__((aligned(4))); /* 4 aligned to fit in to pages*/
+
+
+/* Basic data structure definition ends */
+
+
+/*
+ * Flags for rmap_item to judge if it's listed in the stable/unstable tree.
+ * The flags use the low bits of rmap_item.address
+ */
+#define UNSTABLE_FLAG	0x1
+#define STABLE_FLAG	0x2
+#define get_rmap_addr(x)	((x)->address & PAGE_MASK)
+
+/*
+ * rmap_list_entry helpers
+ */
+#define IS_ADDR_FLAG	1
+#define is_addr(ptr)		((unsigned long)(ptr) & IS_ADDR_FLAG)
+#define set_is_addr(ptr)	((ptr) |= IS_ADDR_FLAG)
+#define get_clean_addr(ptr)	(((ptr) & ~(__typeof__(ptr))IS_ADDR_FLAG))
+
+
+/*
+ * High speed caches for frequently allocated and freed structs
+ */
+static struct kmem_cache *rmap_item_cache;
+static struct kmem_cache *stable_node_cache;
+static struct kmem_cache *node_vma_cache;
+static struct kmem_cache *vma_slot_cache;
+static struct kmem_cache *tree_node_cache;
+#define UKSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("uksm_"#__struct,\
+		sizeof(struct __struct), __alignof__(struct __struct),\
+		(__flags), NULL)
+
+/* Array of all scan_rung, uksm_scan_ladder[0] having the minimum scan ratio */
+#define SCAN_LADDER_SIZE 4
+static struct scan_rung uksm_scan_ladder[SCAN_LADDER_SIZE];
+
+/* The evaluation rounds uksmd has finished */
+static unsigned long long uksm_eval_round = 1;
+
+/*
+ * we add 1 to this var when we consider we should rebuild the whole
+ * unstable tree.
+ */
+static unsigned long uksm_hash_round = 1;
+
+/*
+ * How many times the whole memory is scanned.
+ */
+static unsigned long long fully_scanned_round = 1;
+
+/* The total number of virtual pages of all vma slots */
+static u64 uksm_pages_total;
+
+/* The number of pages has been scanned since the start up */
+static u64 uksm_pages_scanned;
+
+static u64 scanned_virtual_pages;
+
+/* The number of pages has been scanned since last encode_benefit call */
+static u64 uksm_pages_scanned_last;
+
+/* If the scanned number is tooo large, we encode it here */
+static u64 pages_scanned_stored;
+
+static unsigned long pages_scanned_base;
+
+/* The number of nodes in the stable tree */
+static unsigned long uksm_pages_shared;
+
+/* The number of page slots additionally sharing those nodes */
+static unsigned long uksm_pages_sharing;
+
+/* The number of nodes in the unstable tree */
+static unsigned long uksm_pages_unshared;
+
+/*
+ * Milliseconds ksmd should sleep between scans,
+ * >= 100ms to be consistent with
+ * scan_time_to_sleep_msec()
+ */
+static unsigned int uksm_sleep_jiffies;
+
+/* The real value for the uksmd next sleep */
+static unsigned int uksm_sleep_real;
+
+/* Saved value for user input uksm_sleep_jiffies when it's enlarged */
+static unsigned int uksm_sleep_saved;
+
+/* Max percentage of cpu utilization ksmd can take to scan in one batch */
+static unsigned int uksm_max_cpu_percentage;
+
+static int uksm_cpu_governor;
+
+static char *uksm_cpu_governor_str[4] = { "full", "medium", "low", "quiet" };
+
+struct uksm_cpu_preset_s {
+	int cpu_ratio[SCAN_LADDER_SIZE];
+	unsigned int cover_msecs[SCAN_LADDER_SIZE];
+	unsigned int max_cpu; /* percentage */
+};
+
+struct uksm_cpu_preset_s uksm_cpu_preset[4] = {
+	{ {20, 40, -2500, -10000}, {1000, 500, 200, 50}, 95},
+	{ {20, 30, -2500, -10000}, {1000, 500, 400, 100}, 50},
+	{ {10, 20, -5000, -10000}, {1500, 1000, 1000, 250}, 20},
+	{ {10, 20, 40, 75}, {2000, 1000, 1000, 1000}, 1},
+};
+
+/* The default value for uksm_ema_page_time if it's not initialized */
+#define UKSM_PAGE_TIME_DEFAULT	500
+
+/*cost to scan one page by expotional moving average in nsecs */
+static unsigned long uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
+
+/* The expotional moving average alpha weight, in percentage. */
+#define EMA_ALPHA	20
+
+/*
+ * The threshold used to filter out thrashing areas,
+ * If it == 0, filtering is disabled, otherwise it's the percentage up-bound
+ * of the thrashing ratio of all areas. Any area with a bigger thrashing ratio
+ * will be considered as having a zero duplication ratio.
+ */
+static unsigned int uksm_thrash_threshold = 50;
+
+/* How much dedup ratio is considered to be abundant*/
+static unsigned int uksm_abundant_threshold = 10;
+
+/* All slots having merged pages in this eval round. */
+struct list_head vma_slot_dedup = LIST_HEAD_INIT(vma_slot_dedup);
+
+/* How many times the ksmd has slept since startup */
+static unsigned long long uksm_sleep_times;
+
+#define UKSM_RUN_STOP	0
+#define UKSM_RUN_MERGE	1
+static unsigned int uksm_run = 1;
+
+static DECLARE_WAIT_QUEUE_HEAD(uksm_thread_wait);
+static DEFINE_MUTEX(uksm_thread_mutex);
+
+/*
+ * List vma_slot_new is for newly created vma_slot waiting to be added by
+ * ksmd. If one cannot be added(e.g. due to it's too small), it's moved to
+ * vma_slot_noadd. vma_slot_del is the list for vma_slot whose corresponding
+ * VMA has been removed/freed.
+ */
+struct list_head vma_slot_new = LIST_HEAD_INIT(vma_slot_new);
+struct list_head vma_slot_noadd = LIST_HEAD_INIT(vma_slot_noadd);
+struct list_head vma_slot_del = LIST_HEAD_INIT(vma_slot_del);
+static DEFINE_SPINLOCK(vma_slot_list_lock);
+
+/* The unstable tree heads */
+static struct rb_root root_unstable_tree = RB_ROOT;
+
+/*
+ * All tree_nodes are in a list to be freed at once when unstable tree is
+ * freed after each scan round.
+ */
+static struct list_head unstable_tree_node_list =
+				LIST_HEAD_INIT(unstable_tree_node_list);
+
+/* List contains all stable nodes */
+static struct list_head stable_node_list = LIST_HEAD_INIT(stable_node_list);
+
+/*
+ * When the hash strength is changed, the stable tree must be delta_hashed and
+ * re-structured. We use two set of below structs to speed up the
+ * re-structuring of stable tree.
+ */
+static struct list_head
+stable_tree_node_list[2] = {LIST_HEAD_INIT(stable_tree_node_list[0]),
+			    LIST_HEAD_INIT(stable_tree_node_list[1])};
+
+static struct list_head *stable_tree_node_listp = &stable_tree_node_list[0];
+static struct rb_root root_stable_tree[2] = {RB_ROOT, RB_ROOT};
+static struct rb_root *root_stable_treep = &root_stable_tree[0];
+static unsigned long stable_tree_index;
+
+/* The hash strength needed to hash a full page */
+#define HASH_STRENGTH_FULL		(PAGE_SIZE / sizeof(u32))
+
+/* The hash strength needed for loop-back hashing */
+#define HASH_STRENGTH_MAX		(HASH_STRENGTH_FULL + 10)
+
+/* The random offsets in a page */
+static u32 *random_nums;
+
+/* The hash strength */
+static unsigned long hash_strength = HASH_STRENGTH_FULL >> 4;
+
+/* The delta value each time the hash strength increases or decreases */
+static unsigned long hash_strength_delta;
+#define HASH_STRENGTH_DELTA_MAX	5
+
+/* The time we have saved due to random_sample_hash */
+static u64 rshash_pos;
+
+/* The time we have wasted due to hash collision */
+static u64 rshash_neg;
+
+struct uksm_benefit {
+	u64 pos;
+	u64 neg;
+	u64 scanned;
+	unsigned long base;
+} benefit;
+
+/*
+ * The relative cost of memcmp, compared to 1 time unit of random sample
+ * hash, this value is tested when ksm module is initialized
+ */
+static unsigned long memcmp_cost;
+
+static unsigned long  rshash_neg_cont_zero;
+static unsigned long  rshash_cont_obscure;
+
+/* The possible states of hash strength adjustment heuristic */
+enum rshash_states {
+		RSHASH_STILL,
+		RSHASH_TRYUP,
+		RSHASH_TRYDOWN,
+		RSHASH_NEW,
+		RSHASH_PRE_STILL,
+};
+
+/* The possible direction we are about to adjust hash strength */
+enum rshash_direct {
+	GO_UP,
+	GO_DOWN,
+	OBSCURE,
+	STILL,
+};
+
+/* random sampling hash state machine */
+static struct {
+	enum rshash_states state;
+	enum rshash_direct pre_direct;
+	u8 below_count;
+	/* Keep a lookup window of size 5, iff above_count/below_count > 3
+	 * in this window we stop trying.
+	 */
+	u8 lookup_window_index;
+	u64 stable_benefit;
+	unsigned long turn_point_down;
+	unsigned long turn_benefit_down;
+	unsigned long turn_point_up;
+	unsigned long turn_benefit_up;
+	unsigned long stable_point;
+} rshash_state;
+
+/*zero page hash table, hash_strength [0 ~ HASH_STRENGTH_MAX]*/
+static u32 *zero_hash_table;
+
+static inline struct node_vma *alloc_node_vma(void)
+{
+	struct node_vma *node_vma;
+	node_vma = kmem_cache_zalloc(node_vma_cache, GFP_KERNEL);
+	if (node_vma) {
+		INIT_HLIST_HEAD(&node_vma->rmap_hlist);
+		INIT_HLIST_NODE(&node_vma->hlist);
+	}
+	return node_vma;
+}
+
+static inline void free_node_vma(struct node_vma *node_vma)
+{
+	kmem_cache_free(node_vma_cache, node_vma);
+}
+
+
+static inline struct vma_slot *alloc_vma_slot(void)
+{
+	struct vma_slot *slot;
+
+	/*
+	 * In case ksm is not initialized by now.
+	 * Oops, we need to consider the call site of uksm_init() in the future.
+	 */
+	if (!vma_slot_cache)
+		return NULL;
+
+	slot = kmem_cache_zalloc(vma_slot_cache, GFP_KERNEL);
+	if (slot) {
+		INIT_LIST_HEAD(&slot->slot_list);
+		INIT_LIST_HEAD(&slot->dedup_list);
+		slot->flags |= UKSM_SLOT_NEED_RERAND;
+	}
+	return slot;
+}
+
+static inline void free_vma_slot(struct vma_slot *vma_slot)
+{
+	kmem_cache_free(vma_slot_cache, vma_slot);
+}
+
+
+
+static inline struct rmap_item *alloc_rmap_item(void)
+{
+	struct rmap_item *rmap_item;
+
+	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
+	if (rmap_item) {
+		/* bug on lowest bit is not clear for flag use */
+		BUG_ON(is_addr(rmap_item));
+	}
+	return rmap_item;
+}
+
+static inline void free_rmap_item(struct rmap_item *rmap_item)
+{
+	rmap_item->slot = NULL;	/* debug safety */
+	kmem_cache_free(rmap_item_cache, rmap_item);
+}
+
+static inline struct stable_node *alloc_stable_node(void)
+{
+	struct stable_node *node;
+	node = kmem_cache_alloc(stable_node_cache, GFP_KERNEL | GFP_ATOMIC);
+	if (!node)
+		return NULL;
+
+	INIT_HLIST_HEAD(&node->hlist);
+	list_add(&node->all_list, &stable_node_list);
+	return node;
+}
+
+static inline void free_stable_node(struct stable_node *stable_node)
+{
+	list_del(&stable_node->all_list);
+	kmem_cache_free(stable_node_cache, stable_node);
+}
+
+static inline struct tree_node *alloc_tree_node(struct list_head *list)
+{
+	struct tree_node *node;
+	node = kmem_cache_zalloc(tree_node_cache, GFP_KERNEL | GFP_ATOMIC);
+	if (!node)
+		return NULL;
+
+	list_add(&node->all_list, list);
+	return node;
+}
+
+static inline void free_tree_node(struct tree_node *node)
+{
+	list_del(&node->all_list);
+	kmem_cache_free(tree_node_cache, node);
+}
+
+static void uksm_drop_anon_vma(struct rmap_item *rmap_item)
+{
+	struct anon_vma *anon_vma = rmap_item->anon_vma;
+
+	put_anon_vma(anon_vma);
+}
+
+
+/**
+ * Remove a stable node from stable_tree, may unlink from its tree_node and
+ * may remove its parent tree_node if no other stable node is pending.
+ *
+ * @stable_node 	The node need to be removed
+ * @unlink_rb 		Will this node be unlinked from the rbtree?
+ * @remove_tree_	node Will its tree_node be removed if empty?
+ */
+static void remove_node_from_stable_tree(struct stable_node *stable_node,
+					 int unlink_rb,  int remove_tree_node)
+{
+	struct node_vma *node_vma;
+	struct rmap_item *rmap_item;
+	struct hlist_node *n;
+
+	if (!hlist_empty(&stable_node->hlist)) {
+		hlist_for_each_entry_safe(node_vma, n,
+					  &stable_node->hlist, hlist) {
+			hlist_for_each_entry(rmap_item, &node_vma->rmap_hlist, hlist) {
+				uksm_pages_sharing--;
+
+				uksm_drop_anon_vma(rmap_item);
+				rmap_item->address &= PAGE_MASK;
+			}
+			free_node_vma(node_vma);
+			cond_resched();
+		}
+
+		/* the last one is counted as shared */
+		uksm_pages_shared--;
+		uksm_pages_sharing++;
+	}
+
+	if (stable_node->tree_node && unlink_rb) {
+		rb_erase(&stable_node->node,
+			 &stable_node->tree_node->sub_root);
+
+		if (RB_EMPTY_ROOT(&stable_node->tree_node->sub_root) &&
+		    remove_tree_node) {
+			rb_erase(&stable_node->tree_node->node,
+				 root_stable_treep);
+			free_tree_node(stable_node->tree_node);
+		} else {
+			stable_node->tree_node->count--;
+		}
+	}
+
+	free_stable_node(stable_node);
+}
+
+
+/*
+ * get_uksm_page: checks if the page indicated by the stable node
+ * is still its ksm page, despite having held no reference to it.
+ * In which case we can trust the content of the page, and it
+ * returns the gotten page; but if the page has now been zapped,
+ * remove the stale node from the stable tree and return NULL.
+ *
+ * You would expect the stable_node to hold a reference to the ksm page.
+ * But if it increments the page's count, swapping out has to wait for
+ * ksmd to come around again before it can free the page, which may take
+ * seconds or even minutes: much too unresponsive.  So instead we use a
+ * "keyhole reference": access to the ksm page from the stable node peeps
+ * out through its keyhole to see if that page still holds the right key,
+ * pointing back to this stable node.  This relies on freeing a PageAnon
+ * page to reset its page->mapping to NULL, and relies on no other use of
+ * a page to put something that might look like our key in page->mapping.
+ *
+ * include/linux/pagemap.h page_cache_get_speculative() is a good reference,
+ * but this is different - made simpler by uksm_thread_mutex being held, but
+ * interesting for assuming that no other use of the struct page could ever
+ * put our expected_mapping into page->mapping (or a field of the union which
+ * coincides with page->mapping).  The RCU calls are not for KSM at all, but
+ * to keep the page_count protocol described with page_cache_get_speculative.
+ *
+ * Note: it is possible that get_uksm_page() will return NULL one moment,
+ * then page the next, if the page is in between page_freeze_refs() and
+ * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page
+ * is on its way to being freed; but it is an anomaly to bear in mind.
+ *
+ * @unlink_rb: 		if the removal of this node will firstly unlink from
+ * its rbtree. stable_node_reinsert will prevent this when restructuring the
+ * node from its old tree.
+ *
+ * @remove_tree_node:	if this is the last one of its tree_node, will the
+ * tree_node be freed ? If we are inserting stable node, this tree_node may
+ * be reused, so don't free it.
+ */
+static struct page *get_uksm_page(struct stable_node *stable_node,
+				 int unlink_rb, int remove_tree_node)
+{
+	struct page *page;
+	void *expected_mapping;
+
+	page = pfn_to_page(stable_node->kpfn);
+	expected_mapping = (void *)stable_node +
+				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
+	rcu_read_lock();
+	if (page->mapping != expected_mapping)
+		goto stale;
+	if (!get_page_unless_zero(page))
+		goto stale;
+	if (page->mapping != expected_mapping) {
+		put_page(page);
+		goto stale;
+	}
+	rcu_read_unlock();
+	return page;
+stale:
+	rcu_read_unlock();
+	remove_node_from_stable_tree(stable_node, unlink_rb, remove_tree_node);
+
+	return NULL;
+}
+
+/*
+ * Removing rmap_item from stable or unstable tree.
+ * This function will clean the information from the stable/unstable tree.
+ */
+static inline void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
+{
+	if (rmap_item->address & STABLE_FLAG) {
+		struct stable_node *stable_node;
+		struct node_vma *node_vma;
+		struct page *page;
+
+		node_vma = rmap_item->head;
+		stable_node = node_vma->head;
+		page = get_uksm_page(stable_node, 1, 1);
+		if (!page)
+			goto out;
+
+		/*
+		 * page lock is needed because it's racing with
+		 * try_to_unmap_ksm(), etc.
+		 */
+		lock_page(page);
+		hlist_del(&rmap_item->hlist);
+
+		if (hlist_empty(&node_vma->rmap_hlist)) {
+			hlist_del(&node_vma->hlist);
+			free_node_vma(node_vma);
+		}
+		unlock_page(page);
+
+		put_page(page);
+		if (hlist_empty(&stable_node->hlist)) {
+			/* do NOT call remove_node_from_stable_tree() here,
+			 * it's possible for a forked rmap_item not in
+			 * stable tree while the in-tree rmap_items were
+			 * deleted.
+			 */
+			uksm_pages_shared--;
+		} else
+			uksm_pages_sharing--;
+
+
+		uksm_drop_anon_vma(rmap_item);
+	} else if (rmap_item->address & UNSTABLE_FLAG) {
+		if (rmap_item->hash_round == uksm_hash_round) {
+
+			rb_erase(&rmap_item->node,
+				 &rmap_item->tree_node->sub_root);
+			if (RB_EMPTY_ROOT(&rmap_item->tree_node->sub_root)) {
+				rb_erase(&rmap_item->tree_node->node,
+					 &root_unstable_tree);
+
+				free_tree_node(rmap_item->tree_node);
+			} else
+				rmap_item->tree_node->count--;
+		}
+		uksm_pages_unshared--;
+	}
+
+	rmap_item->address &= PAGE_MASK;
+	rmap_item->hash_max = 0;
+
+out:
+	cond_resched();		/* we're called from many long loops */
+}
+
+static inline int slot_in_uksm(struct vma_slot *slot)
+{
+	return list_empty(&slot->slot_list);
+}
+
+/*
+ * Test if the mm is exiting
+ */
+static inline bool uksm_test_exit(struct mm_struct *mm)
+{
+	return atomic_read(&mm->mm_users) == 0;
+}
+
+static inline unsigned long vma_pool_size(struct vma_slot *slot)
+{
+	return round_up(sizeof(struct rmap_list_entry) * slot->pages,
+			PAGE_SIZE) >> PAGE_SHIFT;
+}
+
+#define CAN_OVERFLOW_U64(x, delta) (U64_MAX - (x) < (delta))
+
+/* must be done with sem locked */
+static int slot_pool_alloc(struct vma_slot *slot)
+{
+	unsigned long pool_size;
+
+	if (slot->rmap_list_pool)
+		return 0;
+
+	pool_size = vma_pool_size(slot);
+	slot->rmap_list_pool = kzalloc(sizeof(struct page *) *
+				       pool_size, GFP_KERNEL);
+	if (!slot->rmap_list_pool)
+		return -ENOMEM;
+
+	slot->pool_counts = kzalloc(sizeof(unsigned int) * pool_size,
+				    GFP_KERNEL);
+	if (!slot->pool_counts) {
+		kfree(slot->rmap_list_pool);
+		return -ENOMEM;
+	}
+
+	slot->pool_size = pool_size;
+	BUG_ON(CAN_OVERFLOW_U64(uksm_pages_total, slot->pages));
+	slot->flags |= UKSM_SLOT_IN_UKSM;
+	uksm_pages_total += slot->pages;
+
+	return 0;
+}
+
+/*
+ * Called after vma is unlinked from its mm
+ */
+void uksm_remove_vma(struct vm_area_struct *vma)
+{
+	struct vma_slot *slot;
+
+	if (!vma->uksm_vma_slot)
+		return;
+
+	spin_lock(&vma_slot_list_lock);
+	slot = vma->uksm_vma_slot;
+	if (!slot)
+		goto out;
+
+	if (slot_in_uksm(slot)) {
+		/**
+		 * This slot has been added by ksmd, so move to the del list
+		 * waiting ksmd to free it.
+		 */
+		list_add_tail(&slot->slot_list, &vma_slot_del);
+	} else {
+		/**
+		 * It's still on new list. It's ok to free slot directly.
+		 */
+		list_del(&slot->slot_list);
+		free_vma_slot(slot);
+	}
+out:
+	vma->uksm_vma_slot = NULL;
+	spin_unlock(&vma_slot_list_lock);
+}
+
+/**
+ * Need to do two things:
+ * 1. check if slot was moved to del list
+ * 2. make sure the mmap_sem is manipulated under valid vma.
+ *
+ * My concern here is that in some cases, this may make
+ * vma_slot_list_lock() waiters to serialized further by some
+ * sem->wait_lock, can this really be expensive?
+ *
+ *
+ * @return
+ * 0: if successfully locked mmap_sem
+ * -ENOENT: this slot was moved to del list
+ * -EBUSY: vma lock failed
+ */
+static int try_down_read_slot_mmap_sem(struct vma_slot *slot)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	struct rw_semaphore *sem;
+
+	spin_lock(&vma_slot_list_lock);
+
+	/* the slot_list was removed and inited from new list, when it enters
+	 * uksm_list. If now it's not empty, then it must be moved to del list
+	 */
+	if (!slot_in_uksm(slot)) {
+		spin_unlock(&vma_slot_list_lock);
+		return -ENOENT;
+	}
+
+	BUG_ON(slot->pages != vma_pages(slot->vma));
+	/* Ok, vma still valid */
+	vma = slot->vma;
+	mm = vma->vm_mm;
+	sem = &mm->mmap_sem;
+
+	if (uksm_test_exit(mm)) {
+		spin_unlock(&vma_slot_list_lock);
+		return -ENOENT;
+	}
+
+	if (down_read_trylock(sem)) {
+		spin_unlock(&vma_slot_list_lock);
+		if (slot_pool_alloc(slot)) {
+			uksm_remove_vma(vma);
+			up_read(sem);
+			return -ENOENT;
+		}
+		return 0;
+	}
+
+	spin_unlock(&vma_slot_list_lock);
+	return -EBUSY;
+}
+
+static inline unsigned long
+vma_page_address(struct page *page, struct vm_area_struct *vma)
+{
+	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	unsigned long address;
+
+	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
+		/* page should be within @vma mapping range */
+		return -EFAULT;
+	}
+	return address;
+}
+
+
+/* return 0 on success with the item's mmap_sem locked */
+static inline int get_mergeable_page_lock_mmap(struct rmap_item *item)
+{
+	struct mm_struct *mm;
+	struct vma_slot *slot = item->slot;
+	int err = -EINVAL;
+
+	struct page *page;
+
+	/*
+	 * try_down_read_slot_mmap_sem() returns non-zero if the slot
+	 * has been removed by uksm_remove_vma().
+	 */
+	if (try_down_read_slot_mmap_sem(slot))
+		return -EBUSY;
+
+	mm = slot->vma->vm_mm;
+
+	if (uksm_test_exit(mm))
+		goto failout_up;
+
+	page = item->page;
+	rcu_read_lock();
+	if (!get_page_unless_zero(page)) {
+		rcu_read_unlock();
+		goto failout_up;
+	}
+
+	/* No need to consider huge page here. */
+	if (item->slot->vma->anon_vma != page_anon_vma(page) ||
+	    vma_page_address(page, item->slot->vma) != get_rmap_addr(item)) {
+		/*
+		 * TODO:
+		 * should we release this item becase of its stale page
+		 * mapping?
+		 */
+		put_page(page);
+		rcu_read_unlock();
+		goto failout_up;
+	}
+	rcu_read_unlock();
+	return 0;
+
+failout_up:
+	up_read(&mm->mmap_sem);
+	return err;
+}
+
+/*
+ * What kind of VMA is considered ?
+ */
+static inline int vma_can_enter(struct vm_area_struct *vma)
+{
+	return uksm_flags_can_scan(vma->vm_flags);
+}
+
+/*
+ * Called whenever a fresh new vma is created A new vma_slot.
+ * is created and inserted into a global list Must be called.
+ * after vma is inserted to its mm      		    .
+ */
+void uksm_vma_add_new(struct vm_area_struct *vma)
+{
+	struct vma_slot *slot;
+
+	if (!vma_can_enter(vma)) {
+		vma->uksm_vma_slot = NULL;
+		return;
+	}
+
+	slot = alloc_vma_slot();
+	if (!slot) {
+		vma->uksm_vma_slot = NULL;
+		return;
+	}
+
+	vma->uksm_vma_slot = slot;
+	vma->vm_flags |= VM_MERGEABLE;
+	slot->vma = vma;
+	slot->mm = vma->vm_mm;
+	slot->ctime_j = jiffies;
+	slot->pages = vma_pages(vma);
+	spin_lock(&vma_slot_list_lock);
+	list_add_tail(&slot->slot_list, &vma_slot_new);
+	spin_unlock(&vma_slot_list_lock);
+}
+
+/*   32/3 < they < 32/2 */
+#define shiftl	8
+#define shiftr	12
+
+#define HASH_FROM_TO(from, to) 				\
+for (index = from; index < to; index++) {		\
+	pos = random_nums[index];			\
+	hash += key[pos];				\
+	hash += (hash << shiftl);			\
+	hash ^= (hash >> shiftr);			\
+}
+
+
+#define HASH_FROM_DOWN_TO(from, to) 			\
+for (index = from - 1; index >= to; index--) {		\
+	hash ^= (hash >> shiftr);			\
+	hash ^= (hash >> (shiftr*2));			\
+	hash -= (hash << shiftl);			\
+	hash += (hash << (shiftl*2));			\
+	pos = random_nums[index];			\
+	hash -= key[pos];				\
+}
+
+/*
+ * The main random sample hash function.
+ */
+static u32 random_sample_hash(void *addr, u32 hash_strength)
+{
+	u32 hash = 0xdeadbeef;
+	int index, pos, loop = hash_strength;
+	u32 *key = (u32 *)addr;
+
+	if (loop > HASH_STRENGTH_FULL)
+		loop = HASH_STRENGTH_FULL;
+
+	HASH_FROM_TO(0, loop);
+
+	if (hash_strength > HASH_STRENGTH_FULL) {
+		loop = hash_strength - HASH_STRENGTH_FULL;
+		HASH_FROM_TO(0, loop);
+	}
+
+	return hash;
+}
+
+
+/**
+ * It's used when hash strength is adjusted
+ *
+ * @addr The page's virtual address
+ * @from The original hash strength
+ * @to   The hash strength changed to
+ * @hash The hash value generated with "from" hash value
+ *
+ * return the hash value
+ */
+static u32 delta_hash(void *addr, int from, int to, u32 hash)
+{
+	u32 *key = (u32 *)addr;
+	int index, pos; /* make sure they are int type */
+
+	if (to > from) {
+		if (from >= HASH_STRENGTH_FULL) {
+			from -= HASH_STRENGTH_FULL;
+			to -= HASH_STRENGTH_FULL;
+			HASH_FROM_TO(from, to);
+		} else if (to <= HASH_STRENGTH_FULL) {
+			HASH_FROM_TO(from, to);
+		} else {
+			HASH_FROM_TO(from, HASH_STRENGTH_FULL);
+			HASH_FROM_TO(0, to - HASH_STRENGTH_FULL);
+		}
+	} else {
+		if (from <= HASH_STRENGTH_FULL) {
+			HASH_FROM_DOWN_TO(from, to);
+		} else if (to >= HASH_STRENGTH_FULL) {
+			from -= HASH_STRENGTH_FULL;
+			to -= HASH_STRENGTH_FULL;
+			HASH_FROM_DOWN_TO(from, to);
+		} else {
+			HASH_FROM_DOWN_TO(from - HASH_STRENGTH_FULL, 0);
+			HASH_FROM_DOWN_TO(HASH_STRENGTH_FULL, to);
+		}
+	}
+
+	return hash;
+}
+
+/**
+ *
+ * Called when: rshash_pos or rshash_neg is about to overflow or a scan round
+ * has finished.
+ *
+ * return 0 if no page has been scanned since last call, 1 otherwise.
+ */
+static inline int encode_benefit(void)
+{
+	u64 scanned_delta, pos_delta, neg_delta;
+	unsigned long base = benefit.base;
+
+	scanned_delta = uksm_pages_scanned - uksm_pages_scanned_last;
+
+	if (!scanned_delta)
+		return 0;
+
+	scanned_delta >>= base;
+	pos_delta = rshash_pos >> base;
+	neg_delta = rshash_neg >> base;
+
+	if (CAN_OVERFLOW_U64(benefit.pos, pos_delta) ||
+	    CAN_OVERFLOW_U64(benefit.neg, neg_delta) ||
+	    CAN_OVERFLOW_U64(benefit.scanned, scanned_delta)) {
+		benefit.scanned >>= 1;
+		benefit.neg >>= 1;
+		benefit.pos >>= 1;
+		benefit.base++;
+		scanned_delta >>= 1;
+		pos_delta >>= 1;
+		neg_delta >>= 1;
+	}
+
+	benefit.pos += pos_delta;
+	benefit.neg += neg_delta;
+	benefit.scanned += scanned_delta;
+
+	BUG_ON(!benefit.scanned);
+
+	rshash_pos = rshash_neg = 0;
+	uksm_pages_scanned_last = uksm_pages_scanned;
+
+	return 1;
+}
+
+static inline void reset_benefit(void)
+{
+	benefit.pos = 0;
+	benefit.neg = 0;
+	benefit.base = 0;
+	benefit.scanned = 0;
+}
+
+static inline void inc_rshash_pos(unsigned long delta)
+{
+	if (CAN_OVERFLOW_U64(rshash_pos, delta))
+		encode_benefit();
+
+	rshash_pos += delta;
+}
+
+static inline void inc_rshash_neg(unsigned long delta)
+{
+	if (CAN_OVERFLOW_U64(rshash_neg, delta))
+		encode_benefit();
+
+	rshash_neg += delta;
+}
+
+
+static inline u32 page_hash(struct page *page, unsigned long hash_strength,
+			    int cost_accounting)
+{
+	u32 val;
+	unsigned long delta;
+
+	void *addr = kmap_atomic(page);
+
+	val = random_sample_hash(addr, hash_strength);
+	kunmap_atomic(addr);
+
+	if (cost_accounting) {
+		if (HASH_STRENGTH_FULL > hash_strength)
+			delta = HASH_STRENGTH_FULL - hash_strength;
+		else
+			delta = 0;
+
+		inc_rshash_pos(delta);
+	}
+
+	return val;
+}
+
+static int memcmp_pages(struct page *page1, struct page *page2,
+			int cost_accounting)
+{
+	char *addr1, *addr2;
+	int ret;
+
+	addr1 = kmap_atomic(page1);
+	addr2 = kmap_atomic(page2);
+	ret = memcmp(addr1, addr2, PAGE_SIZE);
+	kunmap_atomic(addr2);
+	kunmap_atomic(addr1);
+
+	if (cost_accounting)
+		inc_rshash_neg(memcmp_cost);
+
+	return ret;
+}
+
+static inline int pages_identical(struct page *page1, struct page *page2)
+{
+	return !memcmp_pages(page1, page2, 0);
+}
+
+static inline int is_page_full_zero(struct page *page)
+{
+	char *addr;
+	int ret;
+
+	addr = kmap_atomic(page);
+	ret = is_full_zero(addr, PAGE_SIZE);
+	kunmap_atomic(addr);
+
+	return ret;
+}
+
+static int write_protect_page(struct vm_area_struct *vma, struct page *page,
+			      pte_t *orig_pte, pte_t *old_pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long addr;
+	pte_t *ptep;
+	spinlock_t *ptl;
+	int swapped;
+	int err = -EFAULT;
+	unsigned long mmun_start;	/* For mmu_notifiers */
+	unsigned long mmun_end;		/* For mmu_notifiers */
+
+	addr = page_address_in_vma(page, vma);
+	if (addr == -EFAULT)
+		goto out;
+
+	BUG_ON(PageTransCompound(page));
+
+	mmun_start = addr;
+	mmun_end   = addr + PAGE_SIZE;
+	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
+	ptep = page_check_address(page, mm, addr, &ptl, 0);
+	if (!ptep)
+		goto out_mn;
+
+	if (old_pte)
+		*old_pte = *ptep;
+
+	if (pte_write(*ptep) || pte_dirty(*ptep)) {
+		pte_t entry;
+
+		swapped = PageSwapCache(page);
+		flush_cache_page(vma, addr, page_to_pfn(page));
+		/*
+		 * Ok this is tricky, when get_user_pages_fast() run it doesnt
+		 * take any lock, therefore the check that we are going to make
+		 * with the pagecount against the mapcount is racey and
+		 * O_DIRECT can happen right after the check.
+		 * So we clear the pte and flush the tlb before the check
+		 * this assure us that no O_DIRECT can happen after the check
+		 * or in the middle of the check.
+		 */
+		entry = ptep_clear_flush_notify(vma, addr, ptep);
+		/*
+		 * Check that no O_DIRECT or similar I/O is in progress on the
+		 * page
+		 */
+		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
+			set_pte_at(mm, addr, ptep, entry);
+			goto out_unlock;
+		}
+		if (pte_dirty(entry))
+			set_page_dirty(page);
+		entry = pte_mkclean(pte_wrprotect(entry));
+		set_pte_at_notify(mm, addr, ptep, entry);
+	}
+	*orig_pte = *ptep;
+	err = 0;
+
+out_unlock:
+	pte_unmap_unlock(ptep, ptl);
+out_mn:
+	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+out:
+	return err;
+}
+
+#define MERGE_ERR_PGERR		1 /* the page is invalid cannot continue */
+#define MERGE_ERR_COLLI		2 /* there is a collision */
+#define MERGE_ERR_COLLI_MAX	3 /* collision at the max hash strength */
+#define MERGE_ERR_CHANGED	4 /* the page has changed since last hash */
+
+
+/**
+ * replace_page - replace page in vma by new ksm page
+ * @vma:      vma that holds the pte pointing to page
+ * @page:     the page we are replacing by kpage
+ * @kpage:    the ksm page we replace page by
+ * @orig_pte: the original value of the pte
+ *
+ * Returns 0 on success, MERGE_ERR_PGERR on failure.
+ */
+static int replace_page(struct vm_area_struct *vma, struct page *page,
+			struct page *kpage, pte_t orig_pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep;
+	spinlock_t *ptl;
+	pte_t entry;
+
+	unsigned long addr;
+	int err = MERGE_ERR_PGERR;
+	unsigned long mmun_start;	/* For mmu_notifiers */
+	unsigned long mmun_end;		/* For mmu_notifiers */
+
+	addr = page_address_in_vma(page, vma);
+	if (addr == -EFAULT)
+		goto out;
+
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		goto out;
+
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud))
+		goto out;
+
+	pmd = pmd_offset(pud, addr);
+	BUG_ON(pmd_trans_huge(*pmd));
+	if (!pmd_present(*pmd))
+		goto out;
+
+	mmun_start = addr;
+	mmun_end   = addr + PAGE_SIZE;
+	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
+	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	if (!pte_same(*ptep, orig_pte)) {
+		pte_unmap_unlock(ptep, ptl);
+		goto out_mn;
+	}
+
+	flush_cache_page(vma, addr, pte_pfn(*ptep));
+	ptep_clear_flush_notify(vma, addr, ptep);
+	entry = mk_pte(kpage, vma->vm_page_prot);
+
+	/* special treatment is needed for zero_page */
+	if ((page_to_pfn(kpage) == uksm_zero_pfn) ||
+				(page_to_pfn(kpage) == zero_pfn))
+		entry = pte_mkspecial(entry);
+	else {
+		get_page(kpage);
+		page_add_anon_rmap(kpage, vma, addr);
+	}
+
+	set_pte_at_notify(mm, addr, ptep, entry);
+
+	page_remove_rmap(page);
+	if (!page_mapped(page))
+		try_to_free_swap(page);
+	put_page(page);
+
+	pte_unmap_unlock(ptep, ptl);
+	err = 0;
+out_mn:
+	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+out:
+	return err;
+}
+
+
+/**
+ *  Fully hash a page with HASH_STRENGTH_MAX return a non-zero hash value. The
+ *  zero hash value at HASH_STRENGTH_MAX is used to indicated that its
+ *  hash_max member has not been calculated.
+ *
+ * @page The page needs to be hashed
+ * @hash_old The hash value calculated with current hash strength
+ *
+ * return the new hash value calculated at HASH_STRENGTH_MAX
+ */
+static inline u32 page_hash_max(struct page *page, u32 hash_old)
+{
+	u32 hash_max = 0;
+	void *addr;
+
+	addr = kmap_atomic(page);
+	hash_max = delta_hash(addr, hash_strength,
+			      HASH_STRENGTH_MAX, hash_old);
+
+	kunmap_atomic(addr);
+
+	if (!hash_max)
+		hash_max = 1;
+
+	inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
+	return hash_max;
+}
+
+/*
+ * We compare the hash again, to ensure that it is really a hash collision
+ * instead of being caused by page write.
+ */
+static inline int check_collision(struct rmap_item *rmap_item,
+				  u32 hash)
+{
+	int err;
+	struct page *page = rmap_item->page;
+
+	/* if this rmap_item has already been hash_maxed, then the collision
+	 * must appears in the second-level rbtree search. In this case we check
+	 * if its hash_max value has been changed. Otherwise, the collision
+	 * happens in the first-level rbtree search, so we check against it's
+	 * current hash value.
+	 */
+	if (rmap_item->hash_max) {
+		inc_rshash_neg(memcmp_cost);
+		inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
+
+		if (rmap_item->hash_max == page_hash_max(page, hash))
+			err = MERGE_ERR_COLLI;
+		else
+			err = MERGE_ERR_CHANGED;
+	} else {
+		inc_rshash_neg(memcmp_cost + hash_strength);
+
+		if (page_hash(page, hash_strength, 0) == hash)
+			err = MERGE_ERR_COLLI;
+		else
+			err = MERGE_ERR_CHANGED;
+	}
+
+	return err;
+}
+
+static struct page *page_trans_compound_anon(struct page *page)
+{
+	if (PageTransCompound(page)) {
+		struct page *head = compound_head(page);
+		/*
+		 * head may actually be splitted and freed from under
+		 * us but it's ok here.
+		 */
+		if (PageAnon(head))
+			return head;
+	}
+	return NULL;
+}
+
+static int page_trans_compound_anon_split(struct page *page)
+{
+	int ret = 0;
+	struct page *transhuge_head = page_trans_compound_anon(page);
+	if (transhuge_head) {
+		/* Get the reference on the head to split it. */
+		if (get_page_unless_zero(transhuge_head)) {
+			/*
+			 * Recheck we got the reference while the head
+			 * was still anonymous.
+			 */
+			if (PageAnon(transhuge_head))
+				ret = split_huge_page(transhuge_head);
+			else
+				/*
+				 * Retry later if split_huge_page run
+				 * from under us.
+				 */
+				ret = 1;
+			put_page(transhuge_head);
+		} else
+			/* Retry later if split_huge_page run from under us. */
+			ret = 1;
+	}
+	return ret;
+}
+
+/**
+ * Try to merge a rmap_item.page with a kpage in stable node. kpage must
+ * already be a ksm page.
+ *
+ * @return 0 if the pages were merged, -EFAULT otherwise.
+ */
+static int try_to_merge_with_uksm_page(struct rmap_item *rmap_item,
+				      struct page *kpage, u32 hash)
+{
+	struct vm_area_struct *vma = rmap_item->slot->vma;
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t orig_pte = __pte(0);
+	int err = MERGE_ERR_PGERR;
+	struct page *page;
+
+	if (uksm_test_exit(mm))
+		goto out;
+
+	page = rmap_item->page;
+
+	if (page == kpage) { /* ksm page forked */
+		err = 0;
+		goto out;
+	}
+
+	if (PageTransCompound(page) && page_trans_compound_anon_split(page))
+		goto out;
+	BUG_ON(PageTransCompound(page));
+
+	if (!PageAnon(page) || !PageKsm(kpage))
+		goto out;
+
+	/*
+	 * We need the page lock to read a stable PageSwapCache in
+	 * write_protect_page().  We use trylock_page() instead of
+	 * lock_page() because we don't want to wait here - we
+	 * prefer to continue scanning and merging different pages,
+	 * then come back to this page when it is unlocked.
+	 */
+	if (!trylock_page(page))
+		goto out;
+	/*
+	 * If this anonymous page is mapped only here, its pte may need
+	 * to be write-protected.  If it's mapped elsewhere, all of its
+	 * ptes are necessarily already write-protected.  But in either
+	 * case, we need to lock and check page_count is not raised.
+	 */
+	if (write_protect_page(vma, page, &orig_pte, NULL) == 0) {
+		if (pages_identical(page, kpage))
+			err = replace_page(vma, page, kpage, orig_pte);
+		else
+			err = check_collision(rmap_item, hash);
+	}
+
+	if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
+		munlock_vma_page(page);
+		if (!PageMlocked(kpage)) {
+			unlock_page(page);
+			lock_page(kpage);
+			mlock_vma_page(kpage);
+			page = kpage;		/* for final unlock */
+		}
+	}
+
+	unlock_page(page);
+out:
+	return err;
+}
+
+
+
+/**
+ * If two pages fail to merge in try_to_merge_two_pages, then we have a chance
+ * to restore a page mapping that has been changed in try_to_merge_two_pages.
+ *
+ * @return 0 on success.
+ */
+static int restore_uksm_page_pte(struct vm_area_struct *vma, unsigned long addr,
+			     pte_t orig_pte, pte_t wprt_pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep;
+	spinlock_t *ptl;
+
+	int err = -EFAULT;
+
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		goto out;
+
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud))
+		goto out;
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd))
+		goto out;
+
+	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	if (!pte_same(*ptep, wprt_pte)) {
+		/* already copied, let it be */
+		pte_unmap_unlock(ptep, ptl);
+		goto out;
+	}
+
+	/*
+	 * Good boy, still here. When we still get the ksm page, it does not
+	 * return to the free page pool, there is no way that a pte was changed
+	 * to other page and gets back to this page. And remind that ksm page
+	 * do not reuse in do_wp_page(). So it's safe to restore the original
+	 * pte.
+	 */
+	flush_cache_page(vma, addr, pte_pfn(*ptep));
+	ptep_clear_flush_notify(vma, addr, ptep);
+	set_pte_at_notify(mm, addr, ptep, orig_pte);
+
+	pte_unmap_unlock(ptep, ptl);
+	err = 0;
+out:
+	return err;
+}
+
+/**
+ * try_to_merge_two_pages() - take two identical pages and prepare
+ * them to be merged into one page(rmap_item->page)
+ *
+ * @return 0 if we successfully merged two identical pages into
+ *         one ksm page. MERGE_ERR_COLLI if it's only a hash collision
+ *         search in rbtree. MERGE_ERR_CHANGED if rmap_item has been
+ *         changed since it's hashed. MERGE_ERR_PGERR otherwise.
+ *
+ */
+static int try_to_merge_two_pages(struct rmap_item *rmap_item,
+				  struct rmap_item *tree_rmap_item,
+				  u32 hash)
+{
+	pte_t orig_pte1 = __pte(0), orig_pte2 = __pte(0);
+	pte_t wprt_pte1 = __pte(0), wprt_pte2 = __pte(0);
+	struct vm_area_struct *vma1 = rmap_item->slot->vma;
+	struct vm_area_struct *vma2 = tree_rmap_item->slot->vma;
+	struct page *page = rmap_item->page;
+	struct page *tree_page = tree_rmap_item->page;
+	int err = MERGE_ERR_PGERR;
+	struct address_space *saved_mapping;
+
+
+	if (rmap_item->page == tree_rmap_item->page)
+		goto out;
+
+	if (PageTransCompound(page) && page_trans_compound_anon_split(page))
+		goto out;
+	BUG_ON(PageTransCompound(page));
+
+	if (PageTransCompound(tree_page) && page_trans_compound_anon_split(tree_page))
+		goto out;
+	BUG_ON(PageTransCompound(tree_page));
+
+	if (!PageAnon(page) || !PageAnon(tree_page))
+		goto out;
+
+	if (!trylock_page(page))
+		goto out;
+
+
+	if (write_protect_page(vma1, page, &wprt_pte1, &orig_pte1) != 0) {
+		unlock_page(page);
+		goto out;
+	}
+
+	/*
+	 * While we hold page lock, upgrade page from
+	 * PageAnon+anon_vma to PageKsm+NULL stable_node:
+	 * stable_tree_insert() will update stable_node.
+	 */
+	saved_mapping = page->mapping;
+	set_page_stable_node(page, NULL);
+	mark_page_accessed(page);
+	unlock_page(page);
+
+	if (!trylock_page(tree_page))
+		goto restore_out;
+
+	if (write_protect_page(vma2, tree_page, &wprt_pte2, &orig_pte2) != 0) {
+		unlock_page(tree_page);
+		goto restore_out;
+	}
+
+	if (pages_identical(page, tree_page)) {
+		err = replace_page(vma2, tree_page, page, wprt_pte2);
+		if (err) {
+			unlock_page(tree_page);
+			goto restore_out;
+		}
+
+		if ((vma2->vm_flags & VM_LOCKED)) {
+			munlock_vma_page(tree_page);
+			if (!PageMlocked(page)) {
+				unlock_page(tree_page);
+				lock_page(page);
+				mlock_vma_page(page);
+				tree_page = page; /* for final unlock */
+			}
+		}
+
+		unlock_page(tree_page);
+
+		goto out; /* success */
+
+	} else {
+		if (tree_rmap_item->hash_max &&
+		    tree_rmap_item->hash_max == rmap_item->hash_max) {
+			err = MERGE_ERR_COLLI_MAX;
+		} else if (page_hash(page, hash_strength, 0) ==
+		    page_hash(tree_page, hash_strength, 0)) {
+			inc_rshash_neg(memcmp_cost + hash_strength * 2);
+			err = MERGE_ERR_COLLI;
+		} else {
+			err = MERGE_ERR_CHANGED;
+		}
+
+		unlock_page(tree_page);
+	}
+
+restore_out:
+	lock_page(page);
+	if (!restore_uksm_page_pte(vma1, get_rmap_addr(rmap_item),
+				  orig_pte1, wprt_pte1))
+		page->mapping = saved_mapping;
+
+	unlock_page(page);
+out:
+	return err;
+}
+
+static inline int hash_cmp(u32 new_val, u32 node_val)
+{
+	if (new_val > node_val)
+		return 1;
+	else if (new_val < node_val)
+		return -1;
+	else
+		return 0;
+}
+
+static inline u32 rmap_item_hash_max(struct rmap_item *item, u32 hash)
+{
+	u32 hash_max = item->hash_max;
+
+	if (!hash_max) {
+		hash_max = page_hash_max(item->page, hash);
+
+		item->hash_max = hash_max;
+	}
+
+	return hash_max;
+}
+
+
+
+/**
+ * stable_tree_search() - search the stable tree for a page
+ *
+ * @item: 	the rmap_item we are comparing with
+ * @hash: 	the hash value of this item->page already calculated
+ *
+ * @return 	the page we have found, NULL otherwise. The page returned has
+ *         	been gotten.
+ */
+static struct page *stable_tree_search(struct rmap_item *item, u32 hash)
+{
+	struct rb_node *node = root_stable_treep->rb_node;
+	struct tree_node *tree_node;
+	unsigned long hash_max;
+	struct page *page = item->page;
+	struct stable_node *stable_node;
+
+	stable_node = page_stable_node(page);
+	if (stable_node) {
+		/* ksm page forked, that is
+		 * if (PageKsm(page) && !in_stable_tree(rmap_item))
+		 * it's actually gotten once outside.
+		 */
+		get_page(page);
+		return page;
+	}
+
+	while (node) {
+		int cmp;
+
+		tree_node = rb_entry(node, struct tree_node, node);
+
+		cmp = hash_cmp(hash, tree_node->hash);
+
+		if (cmp < 0)
+			node = node->rb_left;
+		else if (cmp > 0)
+			node = node->rb_right;
+		else
+			break;
+	}
+
+	if (!node)
+		return NULL;
+
+	if (tree_node->count == 1) {
+		stable_node = rb_entry(tree_node->sub_root.rb_node,
+				       struct stable_node, node);
+		BUG_ON(!stable_node);
+
+		goto get_page_out;
+	}
+
+	/*
+	 * ok, we have to search the second
+	 * level subtree, hash the page to a
+	 * full strength.
+	 */
+	node = tree_node->sub_root.rb_node;
+	BUG_ON(!node);
+	hash_max = rmap_item_hash_max(item, hash);
+
+	while (node) {
+		int cmp;
+
+		stable_node = rb_entry(node, struct stable_node, node);
+
+		cmp = hash_cmp(hash_max, stable_node->hash_max);
+
+		if (cmp < 0)
+			node = node->rb_left;
+		else if (cmp > 0)
+			node = node->rb_right;
+		else
+			goto get_page_out;
+	}
+
+	return NULL;
+
+get_page_out:
+	page = get_uksm_page(stable_node, 1, 1);
+	return page;
+}
+
+static int try_merge_rmap_item(struct rmap_item *item,
+			       struct page *kpage,
+			       struct page *tree_page)
+{
+	spinlock_t *ptl;
+	pte_t *ptep;
+	unsigned long addr;
+	struct vm_area_struct *vma = item->slot->vma;
+
+	addr = get_rmap_addr(item);
+	ptep = page_check_address(kpage, vma->vm_mm, addr, &ptl, 0);
+	if (!ptep)
+		return 0;
+
+	if (pte_write(*ptep)) {
+		/* has changed, abort! */
+		pte_unmap_unlock(ptep, ptl);
+		return 0;
+	}
+
+	get_page(tree_page);
+	page_add_anon_rmap(tree_page, vma, addr);
+
+	flush_cache_page(vma, addr, pte_pfn(*ptep));
+	ptep_clear_flush_notify(vma, addr, ptep);
+	set_pte_at_notify(vma->vm_mm, addr, ptep,
+			  mk_pte(tree_page, vma->vm_page_prot));
+
+	page_remove_rmap(kpage);
+	put_page(kpage);
+
+	pte_unmap_unlock(ptep, ptl);
+
+	return 1;
+}
+
+/**
+ * try_to_merge_with_stable_page() - when two rmap_items need to be inserted
+ * into stable tree, the page was found to be identical to a stable ksm page,
+ * this is the last chance we can merge them into one.
+ *
+ * @item1:	the rmap_item holding the page which we wanted to insert
+ *       	into stable tree.
+ * @item2:	the other rmap_item we found when unstable tree search
+ * @oldpage:	the page currently mapped by the two rmap_items
+ * @tree_page: 	the page we found identical in stable tree node
+ * @success1:	return if item1 is successfully merged
+ * @success2:	return if item2 is successfully merged
+ */
+static void try_merge_with_stable(struct rmap_item *item1,
+				  struct rmap_item *item2,
+				  struct page **kpage,
+				  struct page *tree_page,
+				  int *success1, int *success2)
+{
+	struct vm_area_struct *vma1 = item1->slot->vma;
+	struct vm_area_struct *vma2 = item2->slot->vma;
+	*success1 = 0;
+	*success2 = 0;
+
+	if (unlikely(*kpage == tree_page)) {
+		/* I don't think this can really happen */
+		printk(KERN_WARNING "UKSM: unexpected condition detected in "
+			"try_merge_with_stable() -- *kpage == tree_page !\n");
+		*success1 = 1;
+		*success2 = 1;
+		return;
+	}
+
+	if (!PageAnon(*kpage) || !PageKsm(*kpage))
+		goto failed;
+
+	if (!trylock_page(tree_page))
+		goto failed;
+
+	/* If the oldpage is still ksm and still pointed
+	 * to in the right place, and still write protected,
+	 * we are confident it's not changed, no need to
+	 * memcmp anymore.
+	 * be ware, we cannot take nested pte locks,
+	 * deadlock risk.
+	 */
+	if (!try_merge_rmap_item(item1, *kpage, tree_page))
+		goto unlock_failed;
+
+	/* ok, then vma2, remind that pte1 already set */
+	if (!try_merge_rmap_item(item2, *kpage, tree_page))
+		goto success_1;
+
+	*success2 = 1;
+success_1:
+	*success1 = 1;
+
+
+	if ((*success1 && vma1->vm_flags & VM_LOCKED) ||
+	    (*success2 && vma2->vm_flags & VM_LOCKED)) {
+		munlock_vma_page(*kpage);
+		if (!PageMlocked(tree_page))
+			mlock_vma_page(tree_page);
+	}
+
+	/*
+	 * We do not need oldpage any more in the caller, so can break the lock
+	 * now.
+	 */
+	unlock_page(*kpage);
+	*kpage = tree_page; /* Get unlocked outside. */
+	return;
+
+unlock_failed:
+	unlock_page(tree_page);
+failed:
+	return;
+}
+
+static inline void stable_node_hash_max(struct stable_node *node,
+					 struct page *page, u32 hash)
+{
+	u32 hash_max = node->hash_max;
+
+	if (!hash_max) {
+		hash_max = page_hash_max(page, hash);
+		node->hash_max = hash_max;
+	}
+}
+
+static inline
+struct stable_node *new_stable_node(struct tree_node *tree_node,
+				    struct page *kpage, u32 hash_max)
+{
+	struct stable_node *new_stable_node;
+
+	new_stable_node = alloc_stable_node();
+	if (!new_stable_node)
+		return NULL;
+
+	new_stable_node->kpfn = page_to_pfn(kpage);
+	new_stable_node->hash_max = hash_max;
+	new_stable_node->tree_node = tree_node;
+	set_page_stable_node(kpage, new_stable_node);
+
+	return new_stable_node;
+}
+
+static inline
+struct stable_node *first_level_insert(struct tree_node *tree_node,
+				       struct rmap_item *rmap_item,
+				       struct rmap_item *tree_rmap_item,
+				       struct page **kpage, u32 hash,
+				       int *success1, int *success2)
+{
+	int cmp;
+	struct page *tree_page;
+	u32 hash_max = 0;
+	struct stable_node *stable_node, *new_snode;
+	struct rb_node *parent = NULL, **new;
+
+	/* this tree node contains no sub-tree yet */
+	stable_node = rb_entry(tree_node->sub_root.rb_node,
+			       struct stable_node, node);
+
+	tree_page = get_uksm_page(stable_node, 1, 0);
+	if (tree_page) {
+		cmp = memcmp_pages(*kpage, tree_page, 1);
+		if (!cmp) {
+			try_merge_with_stable(rmap_item, tree_rmap_item, kpage,
+					      tree_page, success1, success2);
+			put_page(tree_page);
+			if (!*success1 && !*success2)
+				goto failed;
+
+			return stable_node;
+
+		} else {
+			/*
+			 * collision in first level try to create a subtree.
+			 * A new node need to be created.
+			 */
+			put_page(tree_page);
+
+			stable_node_hash_max(stable_node, tree_page,
+					     tree_node->hash);
+			hash_max = rmap_item_hash_max(rmap_item, hash);
+			cmp = hash_cmp(hash_max, stable_node->hash_max);
+
+			parent = &stable_node->node;
+			if (cmp < 0) {
+				new = &parent->rb_left;
+			} else if (cmp > 0) {
+				new = &parent->rb_right;
+			} else {
+				goto failed;
+			}
+		}
+
+	} else {
+		/* the only stable_node deleted, we reuse its tree_node.
+		 */
+		parent = NULL;
+		new = &tree_node->sub_root.rb_node;
+	}
+
+	new_snode = new_stable_node(tree_node, *kpage, hash_max);
+	if (!new_snode)
+		goto failed;
+
+	rb_link_node(&new_snode->node, parent, new);
+	rb_insert_color(&new_snode->node, &tree_node->sub_root);
+	tree_node->count++;
+	*success1 = *success2 = 1;
+
+	return new_snode;
+
+failed:
+	return NULL;
+}
+
+static inline
+struct stable_node *stable_subtree_insert(struct tree_node *tree_node,
+					  struct rmap_item *rmap_item,
+					  struct rmap_item *tree_rmap_item,
+					  struct page **kpage, u32 hash,
+					  int *success1, int *success2)
+{
+	struct page *tree_page;
+	u32 hash_max;
+	struct stable_node *stable_node, *new_snode;
+	struct rb_node *parent, **new;
+
+research:
+	parent = NULL;
+	new = &tree_node->sub_root.rb_node;
+	BUG_ON(!*new);
+	hash_max = rmap_item_hash_max(rmap_item, hash);
+	while (*new) {
+		int cmp;
+
+		stable_node = rb_entry(*new, struct stable_node, node);
+
+		cmp = hash_cmp(hash_max, stable_node->hash_max);
+
+		if (cmp < 0) {
+			parent = *new;
+			new = &parent->rb_left;
+		} else if (cmp > 0) {
+			parent = *new;
+			new = &parent->rb_right;
+		} else {
+			tree_page = get_uksm_page(stable_node, 1, 0);
+			if (tree_page) {
+				cmp = memcmp_pages(*kpage, tree_page, 1);
+				if (!cmp) {
+					try_merge_with_stable(rmap_item,
+						tree_rmap_item, kpage,
+						tree_page, success1, success2);
+
+					put_page(tree_page);
+					if (!*success1 && !*success2)
+						goto failed;
+					/*
+					 * successfully merged with a stable
+					 * node
+					 */
+					return stable_node;
+				} else {
+					put_page(tree_page);
+					goto failed;
+				}
+			} else {
+				/*
+				 * stable node may be deleted,
+				 * and subtree maybe
+				 * restructed, cannot
+				 * continue, research it.
+				 */
+				if (tree_node->count) {
+					goto research;
+				} else {
+					/* reuse the tree node*/
+					parent = NULL;
+					new = &tree_node->sub_root.rb_node;
+				}
+			}
+		}
+	}
+
+	new_snode = new_stable_node(tree_node, *kpage, hash_max);
+	if (!new_snode)
+		goto failed;
+
+	rb_link_node(&new_snode->node, parent, new);
+	rb_insert_color(&new_snode->node, &tree_node->sub_root);
+	tree_node->count++;
+	*success1 = *success2 = 1;
+
+	return new_snode;
+
+failed:
+	return NULL;
+}
+
+
+/**
+ * stable_tree_insert() - try to insert a merged page in unstable tree to
+ * the stable tree
+ *
+ * @kpage:		the page need to be inserted
+ * @hash:		the current hash of this page
+ * @rmap_item:		the rmap_item being scanned
+ * @tree_rmap_item:	the rmap_item found on unstable tree
+ * @success1:		return if rmap_item is merged
+ * @success2:		return if tree_rmap_item is merged
+ *
+ * @return 		the stable_node on stable tree if at least one
+ *      		rmap_item is inserted into stable tree, NULL
+ *      		otherwise.
+ */
+static struct stable_node *
+stable_tree_insert(struct page **kpage, u32 hash,
+		   struct rmap_item *rmap_item,
+		   struct rmap_item *tree_rmap_item,
+		   int *success1, int *success2)
+{
+	struct rb_node **new = &root_stable_treep->rb_node;
+	struct rb_node *parent = NULL;
+	struct stable_node *stable_node;
+	struct tree_node *tree_node;
+	u32 hash_max = 0;
+
+	*success1 = *success2 = 0;
+
+	while (*new) {
+		int cmp;
+
+		tree_node = rb_entry(*new, struct tree_node, node);
+
+		cmp = hash_cmp(hash, tree_node->hash);
+
+		if (cmp < 0) {
+			parent = *new;
+			new = &parent->rb_left;
+		} else if (cmp > 0) {
+			parent = *new;
+			new = &parent->rb_right;
+		} else
+			break;
+	}
+
+	if (*new) {
+		if (tree_node->count == 1) {
+			stable_node = first_level_insert(tree_node, rmap_item,
+						tree_rmap_item, kpage,
+						hash, success1, success2);
+		} else {
+			stable_node = stable_subtree_insert(tree_node,
+					rmap_item, tree_rmap_item, kpage,
+					hash, success1, success2);
+		}
+	} else {
+
+		/* no tree node found */
+		tree_node = alloc_tree_node(stable_tree_node_listp);
+		if (!tree_node) {
+			stable_node = NULL;
+			goto out;
+		}
+
+		stable_node = new_stable_node(tree_node, *kpage, hash_max);
+		if (!stable_node) {
+			free_tree_node(tree_node);
+			goto out;
+		}
+
+		tree_node->hash = hash;
+		rb_link_node(&tree_node->node, parent, new);
+		rb_insert_color(&tree_node->node, root_stable_treep);
+		parent = NULL;
+		new = &tree_node->sub_root.rb_node;
+
+		rb_link_node(&stable_node->node, parent, new);
+		rb_insert_color(&stable_node->node, &tree_node->sub_root);
+		tree_node->count++;
+		*success1 = *success2 = 1;
+	}
+
+out:
+	return stable_node;
+}
+
+
+/**
+ * get_tree_rmap_item_page() - try to get the page and lock the mmap_sem
+ *
+ * @return 	0 on success, -EBUSY if unable to lock the mmap_sem,
+ *         	-EINVAL if the page mapping has been changed.
+ */
+static inline int get_tree_rmap_item_page(struct rmap_item *tree_rmap_item)
+{
+	int err;
+
+	err = get_mergeable_page_lock_mmap(tree_rmap_item);
+
+	if (err == -EINVAL) {
+		/* its page map has been changed, remove it */
+		remove_rmap_item_from_tree(tree_rmap_item);
+	}
+
+	/* The page is gotten and mmap_sem is locked now. */
+	return err;
+}
+
+
+/**
+ * unstable_tree_search_insert() - search an unstable tree rmap_item with the
+ * same hash value. Get its page and trylock the mmap_sem
+ */
+static inline
+struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
+					      u32 hash)
+
+{
+	struct rb_node **new = &root_unstable_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct tree_node *tree_node;
+	u32 hash_max;
+	struct rmap_item *tree_rmap_item;
+
+	while (*new) {
+		int cmp;
+
+		tree_node = rb_entry(*new, struct tree_node, node);
+
+		cmp = hash_cmp(hash, tree_node->hash);
+
+		if (cmp < 0) {
+			parent = *new;
+			new = &parent->rb_left;
+		} else if (cmp > 0) {
+			parent = *new;
+			new = &parent->rb_right;
+		} else
+			break;
+	}
+
+	if (*new) {
+		/* got the tree_node */
+		if (tree_node->count == 1) {
+			tree_rmap_item = rb_entry(tree_node->sub_root.rb_node,
+						  struct rmap_item, node);
+			BUG_ON(!tree_rmap_item);
+
+			goto get_page_out;
+		}
+
+		/* well, search the collision subtree */
+		new = &tree_node->sub_root.rb_node;
+		BUG_ON(!*new);
+		hash_max = rmap_item_hash_max(rmap_item, hash);
+
+		while (*new) {
+			int cmp;
+
+			tree_rmap_item = rb_entry(*new, struct rmap_item,
+						  node);
+
+			cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
+			parent = *new;
+			if (cmp < 0)
+				new = &parent->rb_left;
+			else if (cmp > 0)
+				new = &parent->rb_right;
+			else
+				goto get_page_out;
+		}
+	} else {
+		/* alloc a new tree_node */
+		tree_node = alloc_tree_node(&unstable_tree_node_list);
+		if (!tree_node)
+			return NULL;
+
+		tree_node->hash = hash;
+		rb_link_node(&tree_node->node, parent, new);
+		rb_insert_color(&tree_node->node, &root_unstable_tree);
+		parent = NULL;
+		new = &tree_node->sub_root.rb_node;
+	}
+
+	/* did not found even in sub-tree */
+	rmap_item->tree_node = tree_node;
+	rmap_item->address |= UNSTABLE_FLAG;
+	rmap_item->hash_round = uksm_hash_round;
+	rb_link_node(&rmap_item->node, parent, new);
+	rb_insert_color(&rmap_item->node, &tree_node->sub_root);
+
+	uksm_pages_unshared++;
+	return NULL;
+
+get_page_out:
+	if (tree_rmap_item->page == rmap_item->page)
+		return NULL;
+
+	if (get_tree_rmap_item_page(tree_rmap_item))
+		return NULL;
+
+	return tree_rmap_item;
+}
+
+static void hold_anon_vma(struct rmap_item *rmap_item,
+			  struct anon_vma *anon_vma)
+{
+	rmap_item->anon_vma = anon_vma;
+	get_anon_vma(anon_vma);
+}
+
+
+/**
+ * stable_tree_append() - append a rmap_item to a stable node. Deduplication
+ * ratio statistics is done in this function.
+ *
+ */
+static void stable_tree_append(struct rmap_item *rmap_item,
+			       struct stable_node *stable_node, int logdedup)
+{
+	struct node_vma *node_vma = NULL, *new_node_vma, *node_vma_cont = NULL;
+	unsigned long key = (unsigned long)rmap_item->slot;
+	unsigned long factor = rmap_item->slot->rung->step;
+
+	BUG_ON(!stable_node);
+	rmap_item->address |= STABLE_FLAG;
+
+	if (hlist_empty(&stable_node->hlist)) {
+		uksm_pages_shared++;
+		goto node_vma_new;
+	} else {
+		uksm_pages_sharing++;
+	}
+
+	hlist_for_each_entry(node_vma, &stable_node->hlist, hlist) {
+		if (node_vma->key >= key)
+			break;
+
+		if (logdedup) {
+			node_vma->slot->pages_bemerged += factor;
+			if (list_empty(&node_vma->slot->dedup_list))
+				list_add(&node_vma->slot->dedup_list,
+					 &vma_slot_dedup);
+		}
+	}
+
+	if (node_vma) {
+		if (node_vma->key == key) {
+			node_vma_cont = hlist_entry_safe(node_vma->hlist.next, struct node_vma, hlist);
+			goto node_vma_ok;
+		} else if (node_vma->key > key) {
+			node_vma_cont = node_vma;
+		}
+	}
+
+node_vma_new:
+	/* no same vma already in node, alloc a new node_vma */
+	new_node_vma = alloc_node_vma();
+	BUG_ON(!new_node_vma);
+	new_node_vma->head = stable_node;
+	new_node_vma->slot = rmap_item->slot;
+
+	if (!node_vma) {
+		hlist_add_head(&new_node_vma->hlist, &stable_node->hlist);
+	} else if (node_vma->key != key) {
+		if (node_vma->key < key)
+			hlist_add_behind(&new_node_vma->hlist, &node_vma->hlist);
+		else {
+			hlist_add_before(&new_node_vma->hlist,
+					 &node_vma->hlist);
+		}
+
+	}
+	node_vma = new_node_vma;
+
+node_vma_ok: /* ok, ready to add to the list */
+	rmap_item->head = node_vma;
+	hlist_add_head(&rmap_item->hlist, &node_vma->rmap_hlist);
+	hold_anon_vma(rmap_item, rmap_item->slot->vma->anon_vma);
+	if (logdedup) {
+		rmap_item->slot->pages_merged++;
+		if (node_vma_cont) {
+			node_vma = node_vma_cont;
+			hlist_for_each_entry_continue(node_vma, hlist) {
+				node_vma->slot->pages_bemerged += factor;
+				if (list_empty(&node_vma->slot->dedup_list))
+					list_add(&node_vma->slot->dedup_list,
+						 &vma_slot_dedup);
+			}
+		}
+	}
+}
+
+/*
+ * We use break_ksm to break COW on a ksm page: it's a stripped down
+ *
+ *	if (get_user_pages(current, mm, addr, 1, 1, 1, &page, NULL) == 1)
+ *		put_page(page);
+ *
+ * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
+ * in case the application has unmapped and remapped mm,addr meanwhile.
+ * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
+ * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
+ */
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
+{
+	struct page *page;
+	int ret = 0;
+
+	do {
+		cond_resched();
+		page = follow_page(vma, addr, FOLL_GET);
+		if (IS_ERR_OR_NULL(page))
+			break;
+		if (PageKsm(page)) {
+			ret = handle_mm_fault(vma->vm_mm, vma, addr,
+					      FAULT_FLAG_WRITE);
+		} else
+			ret = VM_FAULT_WRITE;
+		put_page(page);
+	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
+	/*
+	 * We must loop because handle_mm_fault() may back out if there's
+	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
+	 *
+	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
+	 * COW has been broken, even if the vma does not permit VM_WRITE;
+	 * but note that a concurrent fault might break PageKsm for us.
+	 *
+	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
+	 * backing file, which also invalidates anonymous pages: that's
+	 * okay, that truncation will have unmapped the PageKsm for us.
+	 *
+	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
+	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
+	 * current task has TIF_MEMDIE set, and will be OOM killed on return
+	 * to user; and ksmd, having no mm, would never be chosen for that.
+	 *
+	 * But if the mm is in a limited mem_cgroup, then the fault may fail
+	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
+	 * even ksmd can fail in this way - though it's usually breaking ksm
+	 * just to undo a merge it made a moment before, so unlikely to oom.
+	 *
+	 * That's a pity: we might therefore have more kernel pages allocated
+	 * than we're counting as nodes in the stable tree; but uksm_do_scan
+	 * will retry to break_cow on each pass, so should recover the page
+	 * in due course.  The important thing is to not let VM_MERGEABLE
+	 * be cleared while any such pages might remain in the area.
+	 */
+	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
+}
+
+static void break_cow(struct rmap_item *rmap_item)
+{
+	struct vm_area_struct *vma = rmap_item->slot->vma;
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long addr = get_rmap_addr(rmap_item);
+
+	if (uksm_test_exit(mm))
+		goto out;
+
+	break_ksm(vma, addr);
+out:
+	return;
+}
+
+/*
+ * Though it's very tempting to unmerge in_stable_tree(rmap_item)s rather
+ * than check every pte of a given vma, the locking doesn't quite work for
+ * that - an rmap_item is assigned to the stable tree after inserting ksm
+ * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
+ * rmap_items from parent to child at fork time (so as not to waste time
+ * if exit comes before the next scan reaches it).
+ *
+ * Similarly, although we'd like to remove rmap_items (so updating counts
+ * and freeing memory) when unmerging an area, it's easier to leave that
+ * to the next pass of ksmd - consider, for example, how ksmd might be
+ * in cmp_and_merge_page on one of the rmap_items we would be removing.
+ */
+inline int unmerge_uksm_pages(struct vm_area_struct *vma,
+		      unsigned long start, unsigned long end)
+{
+	unsigned long addr;
+	int err = 0;
+
+	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
+		if (uksm_test_exit(vma->vm_mm))
+			break;
+		if (signal_pending(current))
+			err = -ERESTARTSYS;
+		else
+			err = break_ksm(vma, addr);
+	}
+	return err;
+}
+
+static inline void inc_uksm_pages_scanned(void)
+{
+	u64 delta;
+
+
+	if (uksm_pages_scanned == U64_MAX) {
+		encode_benefit();
+
+		delta = uksm_pages_scanned >> pages_scanned_base;
+
+		if (CAN_OVERFLOW_U64(pages_scanned_stored, delta)) {
+			pages_scanned_stored >>= 1;
+			delta >>= 1;
+			pages_scanned_base++;
+		}
+
+		pages_scanned_stored += delta;
+
+		uksm_pages_scanned = uksm_pages_scanned_last = 0;
+	}
+
+	uksm_pages_scanned++;
+}
+
+static inline int find_zero_page_hash(int strength, u32 hash)
+{
+	return (zero_hash_table[strength] == hash);
+}
+
+static
+int cmp_and_merge_zero_page(struct vm_area_struct *vma, struct page *page)
+{
+	struct page *zero_page = empty_uksm_zero_page;
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t orig_pte = __pte(0);
+	int err = -EFAULT;
+
+	if (uksm_test_exit(mm))
+		goto out;
+
+	if (PageTransCompound(page) && page_trans_compound_anon_split(page))
+		goto out;
+	BUG_ON(PageTransCompound(page));
+
+	if (!PageAnon(page))
+		goto out;
+
+	if (!trylock_page(page))
+		goto out;
+
+	if (write_protect_page(vma, page, &orig_pte, 0) == 0) {
+		if (is_page_full_zero(page))
+			err = replace_page(vma, page, zero_page, orig_pte);
+	}
+
+	unlock_page(page);
+out:
+	return err;
+}
+
+/*
+ * cmp_and_merge_page() - first see if page can be merged into the stable
+ * tree; if not, compare hash to previous and if it's the same, see if page
+ * can be inserted into the unstable tree, or merged with a page already there
+ * and both transferred to the stable tree.
+ *
+ * @page: the page that we are searching identical page to.
+ * @rmap_item: the reverse mapping into the virtual address of this page
+ */
+static void cmp_and_merge_page(struct rmap_item *rmap_item, u32 hash)
+{
+	struct rmap_item *tree_rmap_item;
+	struct page *page;
+	struct page *kpage = NULL;
+	u32 hash_max;
+	int err;
+	unsigned int success1, success2;
+	struct stable_node *snode;
+	int cmp;
+	struct rb_node *parent = NULL, **new;
+
+	remove_rmap_item_from_tree(rmap_item);
+	page = rmap_item->page;
+
+	/* We first start with searching the page inside the stable tree */
+	kpage = stable_tree_search(rmap_item, hash);
+	if (kpage) {
+		err = try_to_merge_with_uksm_page(rmap_item, kpage,
+						 hash);
+		if (!err) {
+			/*
+			 * The page was successfully merged, add
+			 * its rmap_item to the stable tree.
+			 * page lock is needed because it's
+			 * racing with try_to_unmap_ksm(), etc.
+			 */
+			lock_page(kpage);
+			snode = page_stable_node(kpage);
+			stable_tree_append(rmap_item, snode, 1);
+			unlock_page(kpage);
+			put_page(kpage);
+			return; /* success */
+		}
+		put_page(kpage);
+
+		/*
+		 * if it's a collision and it has been search in sub-rbtree
+		 * (hash_max != 0), we want to abort, because if it is
+		 * successfully merged in unstable tree, the collision trends to
+		 * happen again.
+		 */
+		if (err == MERGE_ERR_COLLI && rmap_item->hash_max)
+			return;
+	}
+
+	tree_rmap_item =
+		unstable_tree_search_insert(rmap_item, hash);
+	if (tree_rmap_item) {
+		err = try_to_merge_two_pages(rmap_item, tree_rmap_item, hash);
+		/*
+		 * As soon as we merge this page, we want to remove the
+		 * rmap_item of the page we have merged with from the unstable
+		 * tree, and insert it instead as new node in the stable tree.
+		 */
+		if (!err) {
+			kpage = page;
+			remove_rmap_item_from_tree(tree_rmap_item);
+			lock_page(kpage);
+			snode = stable_tree_insert(&kpage, hash,
+						   rmap_item, tree_rmap_item,
+						   &success1, &success2);
+
+			/*
+			 * Do not log dedup for tree item, it's not counted as
+			 * scanned in this round.
+			 */
+			if (success2)
+				stable_tree_append(tree_rmap_item, snode, 0);
+
+			/*
+			 * The order of these two stable append is important:
+			 * we are scanning rmap_item.
+			 */
+			if (success1)
+				stable_tree_append(rmap_item, snode, 1);
+
+			/*
+			 * The original kpage may be unlocked inside
+			 * stable_tree_insert() already. This page
+			 * should be unlocked before doing
+			 * break_cow().
+			 */
+			unlock_page(kpage);
+
+			if (!success1)
+				break_cow(rmap_item);
+
+			if (!success2)
+				break_cow(tree_rmap_item);
+
+		} else if (err == MERGE_ERR_COLLI) {
+			BUG_ON(tree_rmap_item->tree_node->count > 1);
+
+			rmap_item_hash_max(tree_rmap_item,
+					   tree_rmap_item->tree_node->hash);
+
+			hash_max = rmap_item_hash_max(rmap_item, hash);
+			cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
+			parent = &tree_rmap_item->node;
+			if (cmp < 0)
+				new = &parent->rb_left;
+			else if (cmp > 0)
+				new = &parent->rb_right;
+			else
+				goto put_up_out;
+
+			rmap_item->tree_node = tree_rmap_item->tree_node;
+			rmap_item->address |= UNSTABLE_FLAG;
+			rmap_item->hash_round = uksm_hash_round;
+			rb_link_node(&rmap_item->node, parent, new);
+			rb_insert_color(&rmap_item->node,
+					&tree_rmap_item->tree_node->sub_root);
+			rmap_item->tree_node->count++;
+		} else {
+			/*
+			 * either one of the page has changed or they collide
+			 * at the max hash, we consider them as ill items.
+			 */
+			remove_rmap_item_from_tree(tree_rmap_item);
+		}
+put_up_out:
+		put_page(tree_rmap_item->page);
+		up_read(&tree_rmap_item->slot->vma->vm_mm->mmap_sem);
+	}
+}
+
+
+
+
+static inline unsigned long get_pool_index(struct vma_slot *slot,
+					   unsigned long index)
+{
+	unsigned long pool_index;
+
+	pool_index = (sizeof(struct rmap_list_entry *) * index) >> PAGE_SHIFT;
+	if (pool_index >= slot->pool_size)
+		BUG();
+	return pool_index;
+}
+
+static inline unsigned long index_page_offset(unsigned long index)
+{
+	return offset_in_page(sizeof(struct rmap_list_entry *) * index);
+}
+
+static inline
+struct rmap_list_entry *get_rmap_list_entry(struct vma_slot *slot,
+					    unsigned long index, int need_alloc)
+{
+	unsigned long pool_index;
+	struct page *page;
+	void *addr;
+
+
+	pool_index = get_pool_index(slot, index);
+	if (!slot->rmap_list_pool[pool_index]) {
+		if (!need_alloc)
+			return NULL;
+
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN);
+		if (!page)
+			return NULL;
+
+		slot->rmap_list_pool[pool_index] = page;
+	}
+
+	addr = kmap(slot->rmap_list_pool[pool_index]);
+	addr += index_page_offset(index);
+
+	return addr;
+}
+
+static inline void put_rmap_list_entry(struct vma_slot *slot,
+				       unsigned long index)
+{
+	unsigned long pool_index;
+
+	pool_index = get_pool_index(slot, index);
+	BUG_ON(!slot->rmap_list_pool[pool_index]);
+	kunmap(slot->rmap_list_pool[pool_index]);
+}
+
+static inline int entry_is_new(struct rmap_list_entry *entry)
+{
+	return !entry->item;
+}
+
+static inline unsigned long get_index_orig_addr(struct vma_slot *slot,
+						unsigned long index)
+{
+	return slot->vma->vm_start + (index << PAGE_SHIFT);
+}
+
+static inline unsigned long get_entry_address(struct rmap_list_entry *entry)
+{
+	unsigned long addr;
+
+	if (is_addr(entry->addr))
+		addr = get_clean_addr(entry->addr);
+	else if (entry->item)
+		addr = get_rmap_addr(entry->item);
+	else
+		BUG();
+
+	return addr;
+}
+
+static inline struct rmap_item *get_entry_item(struct rmap_list_entry *entry)
+{
+	if (is_addr(entry->addr))
+		return NULL;
+
+	return entry->item;
+}
+
+static inline void inc_rmap_list_pool_count(struct vma_slot *slot,
+					    unsigned long index)
+{
+	unsigned long pool_index;
+
+	pool_index = get_pool_index(slot, index);
+	BUG_ON(!slot->rmap_list_pool[pool_index]);
+	slot->pool_counts[pool_index]++;
+}
+
+static inline void dec_rmap_list_pool_count(struct vma_slot *slot,
+					    unsigned long index)
+{
+	unsigned long pool_index;
+
+	pool_index = get_pool_index(slot, index);
+	BUG_ON(!slot->rmap_list_pool[pool_index]);
+	BUG_ON(!slot->pool_counts[pool_index]);
+	slot->pool_counts[pool_index]--;
+}
+
+static inline int entry_has_rmap(struct rmap_list_entry *entry)
+{
+	return !is_addr(entry->addr) && entry->item;
+}
+
+static inline void swap_entries(struct rmap_list_entry *entry1,
+				unsigned long index1,
+				struct rmap_list_entry *entry2,
+				unsigned long index2)
+{
+	struct rmap_list_entry tmp;
+
+	/* swapping two new entries is meaningless */
+	BUG_ON(entry_is_new(entry1) && entry_is_new(entry2));
+
+	tmp = *entry1;
+	*entry1 = *entry2;
+	*entry2 = tmp;
+
+	if (entry_has_rmap(entry1))
+		entry1->item->entry_index = index1;
+
+	if (entry_has_rmap(entry2))
+		entry2->item->entry_index = index2;
+
+	if (entry_has_rmap(entry1) && !entry_has_rmap(entry2)) {
+		inc_rmap_list_pool_count(entry1->item->slot, index1);
+		dec_rmap_list_pool_count(entry1->item->slot, index2);
+	} else if (!entry_has_rmap(entry1) && entry_has_rmap(entry2)) {
+		inc_rmap_list_pool_count(entry2->item->slot, index2);
+		dec_rmap_list_pool_count(entry2->item->slot, index1);
+	}
+}
+
+static inline void free_entry_item(struct rmap_list_entry *entry)
+{
+	unsigned long index;
+	struct rmap_item *item;
+
+	if (!is_addr(entry->addr)) {
+		BUG_ON(!entry->item);
+		item = entry->item;
+		entry->addr = get_rmap_addr(item);
+		set_is_addr(entry->addr);
+		index = item->entry_index;
+		remove_rmap_item_from_tree(item);
+		dec_rmap_list_pool_count(item->slot, index);
+		free_rmap_item(item);
+	}
+}
+
+static inline int pool_entry_boundary(unsigned long index)
+{
+	unsigned long linear_addr;
+
+	linear_addr = sizeof(struct rmap_list_entry *) * index;
+	return index && !offset_in_page(linear_addr);
+}
+
+static inline void try_free_last_pool(struct vma_slot *slot,
+				      unsigned long index)
+{
+	unsigned long pool_index;
+
+	pool_index = get_pool_index(slot, index);
+	if (slot->rmap_list_pool[pool_index] &&
+	    !slot->pool_counts[pool_index]) {
+		__free_page(slot->rmap_list_pool[pool_index]);
+		slot->rmap_list_pool[pool_index] = NULL;
+		slot->flags |= UKSM_SLOT_NEED_SORT;
+	}
+
+}
+
+static inline unsigned long vma_item_index(struct vm_area_struct *vma,
+					   struct rmap_item *item)
+{
+	return (get_rmap_addr(item) - vma->vm_start) >> PAGE_SHIFT;
+}
+
+static int within_same_pool(struct vma_slot *slot,
+			    unsigned long i, unsigned long j)
+{
+	unsigned long pool_i, pool_j;
+
+	pool_i = get_pool_index(slot, i);
+	pool_j = get_pool_index(slot, j);
+
+	return (pool_i == pool_j);
+}
+
+static void sort_rmap_entry_list(struct vma_slot *slot)
+{
+	unsigned long i, j;
+	struct rmap_list_entry *entry, *swap_entry;
+
+	entry = get_rmap_list_entry(slot, 0, 0);
+	for (i = 0; i < slot->pages; ) {
+
+		if (!entry)
+			goto skip_whole_pool;
+
+		if (entry_is_new(entry))
+			goto next_entry;
+
+		if (is_addr(entry->addr)) {
+			entry->addr = 0;
+			goto next_entry;
+		}
+
+		j = vma_item_index(slot->vma, entry->item);
+		if (j == i)
+			goto next_entry;
+
+		if (within_same_pool(slot, i, j))
+			swap_entry = entry + j - i;
+		else
+			swap_entry = get_rmap_list_entry(slot, j, 1);
+
+		swap_entries(entry, i, swap_entry, j);
+		if (!within_same_pool(slot, i, j))
+			put_rmap_list_entry(slot, j);
+		continue;
+
+skip_whole_pool:
+		i += PAGE_SIZE / sizeof(*entry);
+		if (i < slot->pages)
+			entry = get_rmap_list_entry(slot, i, 0);
+		continue;
+
+next_entry:
+		if (i >= slot->pages - 1 ||
+		    !within_same_pool(slot, i, i + 1)) {
+			put_rmap_list_entry(slot, i);
+			if (i + 1 < slot->pages)
+				entry = get_rmap_list_entry(slot, i + 1, 0);
+		} else
+			entry++;
+		i++;
+		continue;
+	}
+
+	/* free empty pool entries which contain no rmap_item */
+	/* CAN be simplied to based on only pool_counts when bug freed !!!!! */
+	for (i = 0; i < slot->pool_size; i++) {
+		unsigned char has_rmap;
+		void *addr;
+
+		if (!slot->rmap_list_pool[i])
+			continue;
+
+		has_rmap = 0;
+		addr = kmap(slot->rmap_list_pool[i]);
+		BUG_ON(!addr);
+		for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
+			entry = (struct rmap_list_entry *)addr + j;
+			if (is_addr(entry->addr))
+				continue;
+			if (!entry->item)
+				continue;
+			has_rmap = 1;
+		}
+		kunmap(slot->rmap_list_pool[i]);
+		if (!has_rmap) {
+			BUG_ON(slot->pool_counts[i]);
+			__free_page(slot->rmap_list_pool[i]);
+			slot->rmap_list_pool[i] = NULL;
+		}
+	}
+
+	slot->flags &= ~UKSM_SLOT_NEED_SORT;
+}
+
+/*
+ * vma_fully_scanned() - if all the pages in this slot have been scanned.
+ */
+static inline int vma_fully_scanned(struct vma_slot *slot)
+{
+	return slot->pages_scanned == slot->pages;
+}
+
+/**
+ * get_next_rmap_item() - Get the next rmap_item in a vma_slot according to
+ * its random permutation. This function is embedded with the random
+ * permutation index management code.
+ */
+static struct rmap_item *get_next_rmap_item(struct vma_slot *slot, u32 *hash)
+{
+	unsigned long rand_range, addr, swap_index, scan_index;
+	struct rmap_item *item = NULL;
+	struct rmap_list_entry *scan_entry, *swap_entry = NULL;
+	struct page *page;
+
+	scan_index = swap_index = slot->pages_scanned % slot->pages;
+
+	if (pool_entry_boundary(scan_index))
+		try_free_last_pool(slot, scan_index - 1);
+
+	if (vma_fully_scanned(slot)) {
+		if (slot->flags & UKSM_SLOT_NEED_SORT)
+			slot->flags |= UKSM_SLOT_NEED_RERAND;
+		else
+			slot->flags &= ~UKSM_SLOT_NEED_RERAND;
+		if (slot->flags & UKSM_SLOT_NEED_SORT)
+			sort_rmap_entry_list(slot);
+	}
+
+	scan_entry = get_rmap_list_entry(slot, scan_index, 1);
+	if (!scan_entry)
+		return NULL;
+
+	if (entry_is_new(scan_entry)) {
+		scan_entry->addr = get_index_orig_addr(slot, scan_index);
+		set_is_addr(scan_entry->addr);
+	}
+
+	if (slot->flags & UKSM_SLOT_NEED_RERAND) {
+		rand_range = slot->pages - scan_index;
+		BUG_ON(!rand_range);
+		swap_index = scan_index + (prandom_u32() % rand_range);
+	}
+
+	if (swap_index != scan_index) {
+		swap_entry = get_rmap_list_entry(slot, swap_index, 1);
+		if (entry_is_new(swap_entry)) {
+			swap_entry->addr = get_index_orig_addr(slot,
+							       swap_index);
+			set_is_addr(swap_entry->addr);
+		}
+		swap_entries(scan_entry, scan_index, swap_entry, swap_index);
+	}
+
+	addr = get_entry_address(scan_entry);
+	item = get_entry_item(scan_entry);
+	BUG_ON(addr > slot->vma->vm_end || addr < slot->vma->vm_start);
+
+	page = follow_page(slot->vma, addr, FOLL_GET);
+	if (IS_ERR_OR_NULL(page))
+		goto nopage;
+
+	if (!PageAnon(page) && !page_trans_compound_anon(page))
+		goto putpage;
+
+	/*check is zero_page pfn or uksm_zero_page*/
+	if ((page_to_pfn(page) == zero_pfn)
+			|| (page_to_pfn(page) == uksm_zero_pfn))
+		goto putpage;
+
+	flush_anon_page(slot->vma, page, addr);
+	flush_dcache_page(page);
+
+
+	*hash = page_hash(page, hash_strength, 1);
+	inc_uksm_pages_scanned();
+	/*if the page content all zero, re-map to zero-page*/
+	if (find_zero_page_hash(hash_strength, *hash)) {
+		if (!cmp_and_merge_zero_page(slot->vma, page)) {
+			slot->pages_merged++;
+			inc_zone_page_state(page, NR_UKSM_ZERO_PAGES);
+			dec_mm_counter(slot->mm, MM_ANONPAGES);
+
+			/* For full-zero pages, no need to create rmap item */
+			goto putpage;
+		} else {
+			inc_rshash_neg(memcmp_cost / 2);
+		}
+	}
+
+	if (!item) {
+		item = alloc_rmap_item();
+		if (item) {
+			/* It has already been zeroed */
+			item->slot = slot;
+			item->address = addr;
+			item->entry_index = scan_index;
+			scan_entry->item = item;
+			inc_rmap_list_pool_count(slot, scan_index);
+		} else
+			goto putpage;
+	}
+
+	BUG_ON(item->slot != slot);
+	/* the page may have changed */
+	item->page = page;
+	put_rmap_list_entry(slot, scan_index);
+	if (swap_entry)
+		put_rmap_list_entry(slot, swap_index);
+	return item;
+
+putpage:
+	put_page(page);
+	page = NULL;
+nopage:
+	/* no page, store addr back and free rmap_item if possible */
+	free_entry_item(scan_entry);
+	put_rmap_list_entry(slot, scan_index);
+	if (swap_entry)
+		put_rmap_list_entry(slot, swap_index);
+	return NULL;
+}
+
+static inline int in_stable_tree(struct rmap_item *rmap_item)
+{
+	return rmap_item->address & STABLE_FLAG;
+}
+
+/**
+ * scan_vma_one_page() - scan the next page in a vma_slot. Called with
+ * mmap_sem locked.
+ */
+static noinline void scan_vma_one_page(struct vma_slot *slot)
+{
+	u32 hash;
+	struct mm_struct *mm;
+	struct rmap_item *rmap_item = NULL;
+	struct vm_area_struct *vma = slot->vma;
+
+	mm = vma->vm_mm;
+	BUG_ON(!mm);
+	BUG_ON(!slot);
+
+	rmap_item = get_next_rmap_item(slot, &hash);
+	if (!rmap_item)
+		goto out1;
+
+	if (PageKsm(rmap_item->page) && in_stable_tree(rmap_item))
+		goto out2;
+
+	cmp_and_merge_page(rmap_item, hash);
+out2:
+	put_page(rmap_item->page);
+out1:
+	slot->pages_scanned++;
+	if (slot->fully_scanned_round != fully_scanned_round)
+		scanned_virtual_pages++;
+
+	if (vma_fully_scanned(slot))
+		slot->fully_scanned_round = fully_scanned_round;
+}
+
+static inline unsigned long rung_get_pages(struct scan_rung *rung)
+{
+	struct slot_tree_node *node;
+
+	if (!rung->vma_root.rnode)
+		return 0;
+
+	node = container_of(rung->vma_root.rnode, struct slot_tree_node, snode);
+
+	return node->size;
+}
+
+#define RUNG_SAMPLED_MIN	3
+
+static inline
+void uksm_calc_rung_step(struct scan_rung *rung,
+			 unsigned long page_time, unsigned long ratio)
+{
+	unsigned long sampled, pages;
+
+	/* will be fully scanned ? */
+	if (!rung->cover_msecs) {
+		rung->step = 1;
+		return;
+	}
+
+	sampled = rung->cover_msecs * (NSEC_PER_MSEC / TIME_RATIO_SCALE)
+		  * ratio / page_time;
+
+	/*
+	 *  Before we finsish a scan round and expensive per-round jobs,
+	 *  we need to have a chance to estimate the per page time. So
+	 *  the sampled number can not be too small.
+	 */
+	if (sampled < RUNG_SAMPLED_MIN)
+		sampled = RUNG_SAMPLED_MIN;
+
+	pages = rung_get_pages(rung);
+	if (likely(pages > sampled))
+		rung->step = pages / sampled;
+	else
+		rung->step = 1;
+}
+
+static inline int step_need_recalc(struct scan_rung *rung)
+{
+	unsigned long pages, stepmax;
+
+	pages = rung_get_pages(rung);
+	stepmax = pages / RUNG_SAMPLED_MIN;
+
+	return pages && (rung->step > pages ||
+			 (stepmax && rung->step > stepmax));
+}
+
+static inline
+void reset_current_scan(struct scan_rung *rung, int finished, int step_recalc)
+{
+	struct vma_slot *slot;
+
+	if (finished)
+		rung->flags |= UKSM_RUNG_ROUND_FINISHED;
+
+	if (step_recalc || step_need_recalc(rung)) {
+		uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
+		BUG_ON(step_need_recalc(rung));
+	}
+
+	slot_iter_index = prandom_u32() % rung->step;
+	BUG_ON(!rung->vma_root.rnode);
+	slot = sradix_tree_next(&rung->vma_root, NULL, 0, slot_iter);
+	BUG_ON(!slot);
+
+	rung->current_scan = slot;
+	rung->current_offset = slot_iter_index;
+}
+
+static inline struct sradix_tree_root *slot_get_root(struct vma_slot *slot)
+{
+	return &slot->rung->vma_root;
+}
+
+/*
+ * return if resetted.
+ */
+static int advance_current_scan(struct scan_rung *rung)
+{
+	unsigned short n;
+	struct vma_slot *slot, *next = NULL;
+
+	BUG_ON(!rung->vma_root.num);
+
+	slot = rung->current_scan;
+	n = (slot->pages - rung->current_offset) % rung->step;
+	slot_iter_index = rung->step - n;
+	next = sradix_tree_next(&rung->vma_root, slot->snode,
+				slot->sindex, slot_iter);
+
+	if (next) {
+		rung->current_offset = slot_iter_index;
+		rung->current_scan = next;
+		return 0;
+	} else {
+		reset_current_scan(rung, 1, 0);
+		return 1;
+	}
+}
+
+static inline void rung_rm_slot(struct vma_slot *slot)
+{
+	struct scan_rung *rung = slot->rung;
+	struct sradix_tree_root *root;
+
+	if (rung->current_scan == slot)
+		advance_current_scan(rung);
+
+	root = slot_get_root(slot);
+	sradix_tree_delete_from_leaf(root, slot->snode, slot->sindex);
+	slot->snode = NULL;
+	if (step_need_recalc(rung)) {
+		uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
+		BUG_ON(step_need_recalc(rung));
+	}
+
+	/* In case advance_current_scan loop back to this slot again */
+	if (rung->vma_root.num && rung->current_scan == slot)
+		reset_current_scan(slot->rung, 1, 0);
+}
+
+static inline void rung_add_new_slots(struct scan_rung *rung,
+			struct vma_slot **slots, unsigned long num)
+{
+	int err;
+	struct vma_slot *slot;
+	unsigned long i;
+	struct sradix_tree_root *root = &rung->vma_root;
+
+	err = sradix_tree_enter(root, (void **)slots, num);
+	BUG_ON(err);
+
+	for (i = 0; i < num; i++) {
+		slot = slots[i];
+		slot->rung = rung;
+		BUG_ON(vma_fully_scanned(slot));
+	}
+
+	if (rung->vma_root.num == num)
+		reset_current_scan(rung, 0, 1);
+}
+
+static inline int rung_add_one_slot(struct scan_rung *rung,
+				     struct vma_slot *slot)
+{
+	int err;
+
+	err = sradix_tree_enter(&rung->vma_root, (void **)&slot, 1);
+	if (err)
+		return err;
+
+	slot->rung = rung;
+	if (rung->vma_root.num == 1)
+		reset_current_scan(rung, 0, 1);
+
+	return 0;
+}
+
+/*
+ * Return true if the slot is deleted from its rung.
+ */
+static inline int vma_rung_enter(struct vma_slot *slot, struct scan_rung *rung)
+{
+	struct scan_rung *old_rung = slot->rung;
+	int err;
+
+	if (old_rung == rung)
+		return 0;
+
+	rung_rm_slot(slot);
+	err = rung_add_one_slot(rung, slot);
+	if (err) {
+		err = rung_add_one_slot(old_rung, slot);
+		WARN_ON(err); /* OOPS, badly OOM, we lost this slot */
+	}
+
+	return 1;
+}
+
+static inline int vma_rung_up(struct vma_slot *slot)
+{
+	struct scan_rung *rung;
+
+	rung = slot->rung;
+	if (slot->rung != &uksm_scan_ladder[SCAN_LADDER_SIZE-1])
+		rung++;
+
+	return vma_rung_enter(slot, rung);
+}
+
+static inline int vma_rung_down(struct vma_slot *slot)
+{
+	struct scan_rung *rung;
+
+	rung = slot->rung;
+	if (slot->rung != &uksm_scan_ladder[0])
+		rung--;
+
+	return vma_rung_enter(slot, rung);
+}
+
+/**
+ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
+ */
+static unsigned long cal_dedup_ratio(struct vma_slot *slot)
+{
+	unsigned long ret;
+
+	BUG_ON(slot->pages_scanned == slot->last_scanned);
+
+	ret = slot->pages_merged;
+
+	/* Thrashing area filtering */
+	if (ret && uksm_thrash_threshold) {
+		if (slot->pages_cowed * 100 / slot->pages_merged
+		    > uksm_thrash_threshold) {
+			ret = 0;
+		} else {
+			ret = slot->pages_merged - slot->pages_cowed;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
+ */
+static unsigned long cal_dedup_ratio_old(struct vma_slot *slot)
+{
+	unsigned long ret;
+	unsigned long pages_scanned;
+
+	pages_scanned = slot->pages_scanned;
+	if (!pages_scanned) {
+		if (uksm_thrash_threshold)
+			return 0;
+		else
+			pages_scanned = slot->pages_scanned;
+	}
+
+	ret = slot->pages_bemerged * 100 / pages_scanned;
+
+	/* Thrashing area filtering */
+	if (ret && uksm_thrash_threshold) {
+		if (slot->pages_cowed * 100 / slot->pages_bemerged
+		    > uksm_thrash_threshold) {
+			ret = 0;
+		} else {
+			ret = slot->pages_bemerged - slot->pages_cowed;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * stable_node_reinsert() - When the hash_strength has been adjusted, the
+ * stable tree need to be restructured, this is the function re-inserting the
+ * stable node.
+ */
+static inline void stable_node_reinsert(struct stable_node *new_node,
+					struct page *page,
+					struct rb_root *root_treep,
+					struct list_head *tree_node_listp,
+					u32 hash)
+{
+	struct rb_node **new = &root_treep->rb_node;
+	struct rb_node *parent = NULL;
+	struct stable_node *stable_node;
+	struct tree_node *tree_node;
+	struct page *tree_page;
+	int cmp;
+
+	while (*new) {
+		int cmp;
+
+		tree_node = rb_entry(*new, struct tree_node, node);
+
+		cmp = hash_cmp(hash, tree_node->hash);
+
+		if (cmp < 0) {
+			parent = *new;
+			new = &parent->rb_left;
+		} else if (cmp > 0) {
+			parent = *new;
+			new = &parent->rb_right;
+		} else
+			break;
+	}
+
+	if (*new) {
+		/* find a stable tree node with same first level hash value */
+		stable_node_hash_max(new_node, page, hash);
+		if (tree_node->count == 1) {
+			stable_node = rb_entry(tree_node->sub_root.rb_node,
+					       struct stable_node, node);
+			tree_page = get_uksm_page(stable_node, 1, 0);
+			if (tree_page) {
+				stable_node_hash_max(stable_node,
+						      tree_page, hash);
+				put_page(tree_page);
+
+				/* prepare for stable node insertion */
+
+				cmp = hash_cmp(new_node->hash_max,
+						   stable_node->hash_max);
+				parent = &stable_node->node;
+				if (cmp < 0)
+					new = &parent->rb_left;
+				else if (cmp > 0)
+					new = &parent->rb_right;
+				else
+					goto failed;
+
+				goto add_node;
+			} else {
+				/* the only stable_node deleted, the tree node
+				 * was not deleted.
+				 */
+				goto tree_node_reuse;
+			}
+		}
+
+		/* well, search the collision subtree */
+		new = &tree_node->sub_root.rb_node;
+		parent = NULL;
+		BUG_ON(!*new);
+		while (*new) {
+			int cmp;
+
+			stable_node = rb_entry(*new, struct stable_node, node);
+
+			cmp = hash_cmp(new_node->hash_max,
+					   stable_node->hash_max);
+
+			if (cmp < 0) {
+				parent = *new;
+				new = &parent->rb_left;
+			} else if (cmp > 0) {
+				parent = *new;
+				new = &parent->rb_right;
+			} else {
+				/* oh, no, still a collision */
+				goto failed;
+			}
+		}
+
+		goto add_node;
+	}
+
+	/* no tree node found */
+	tree_node = alloc_tree_node(tree_node_listp);
+	if (!tree_node) {
+		printk(KERN_ERR "UKSM: memory allocation error!\n");
+		goto failed;
+	} else {
+		tree_node->hash = hash;
+		rb_link_node(&tree_node->node, parent, new);
+		rb_insert_color(&tree_node->node, root_treep);
+
+tree_node_reuse:
+		/* prepare for stable node insertion */
+		parent = NULL;
+		new = &tree_node->sub_root.rb_node;
+	}
+
+add_node:
+	rb_link_node(&new_node->node, parent, new);
+	rb_insert_color(&new_node->node, &tree_node->sub_root);
+	new_node->tree_node = tree_node;
+	tree_node->count++;
+	return;
+
+failed:
+	/* This can only happen when two nodes have collided
+	 * in two levels.
+	 */
+	new_node->tree_node = NULL;
+	return;
+}
+
+static inline void free_all_tree_nodes(struct list_head *list)
+{
+	struct tree_node *node, *tmp;
+
+	list_for_each_entry_safe(node, tmp, list, all_list) {
+		free_tree_node(node);
+	}
+}
+
+/**
+ * stable_tree_delta_hash() - Delta hash the stable tree from previous hash
+ * strength to the current hash_strength. It re-structures the hole tree.
+ */
+static inline void stable_tree_delta_hash(u32 prev_hash_strength)
+{
+	struct stable_node *node, *tmp;
+	struct rb_root *root_new_treep;
+	struct list_head *new_tree_node_listp;
+
+	stable_tree_index = (stable_tree_index + 1) % 2;
+	root_new_treep = &root_stable_tree[stable_tree_index];
+	new_tree_node_listp = &stable_tree_node_list[stable_tree_index];
+	*root_new_treep = RB_ROOT;
+	BUG_ON(!list_empty(new_tree_node_listp));
+
+	/*
+	 * we need to be safe, the node could be removed by get_uksm_page()
+	 */
+	list_for_each_entry_safe(node, tmp, &stable_node_list, all_list) {
+		void *addr;
+		struct page *node_page;
+		u32 hash;
+
+		/*
+		 * We are completely re-structuring the stable nodes to a new
+		 * stable tree. We don't want to touch the old tree unlinks and
+		 * old tree_nodes. The old tree_nodes will be freed at once.
+		 */
+		node_page = get_uksm_page(node, 0, 0);
+		if (!node_page)
+			continue;
+
+		if (node->tree_node) {
+			hash = node->tree_node->hash;
+
+			addr = kmap_atomic(node_page);
+
+			hash = delta_hash(addr, prev_hash_strength,
+					  hash_strength, hash);
+			kunmap_atomic(addr);
+		} else {
+			/*
+			 *it was not inserted to rbtree due to collision in last
+			 *round scan.
+			 */
+			hash = page_hash(node_page, hash_strength, 0);
+		}
+
+		stable_node_reinsert(node, node_page, root_new_treep,
+				     new_tree_node_listp, hash);
+		put_page(node_page);
+	}
+
+	root_stable_treep = root_new_treep;
+	free_all_tree_nodes(stable_tree_node_listp);
+	BUG_ON(!list_empty(stable_tree_node_listp));
+	stable_tree_node_listp = new_tree_node_listp;
+}
+
+static inline void inc_hash_strength(unsigned long delta)
+{
+	hash_strength += 1 << delta;
+	if (hash_strength > HASH_STRENGTH_MAX)
+		hash_strength = HASH_STRENGTH_MAX;
+}
+
+static inline void dec_hash_strength(unsigned long delta)
+{
+	unsigned long change = 1 << delta;
+
+	if (hash_strength <= change + 1)
+		hash_strength = 1;
+	else
+		hash_strength -= change;
+}
+
+static inline void inc_hash_strength_delta(void)
+{
+	hash_strength_delta++;
+	if (hash_strength_delta > HASH_STRENGTH_DELTA_MAX)
+		hash_strength_delta = HASH_STRENGTH_DELTA_MAX;
+}
+
+/*
+static inline unsigned long get_current_neg_ratio(void)
+{
+	if (!rshash_pos || rshash_neg > rshash_pos)
+		return 100;
+
+	return div64_u64(100 * rshash_neg , rshash_pos);
+}
+*/
+
+static inline unsigned long get_current_neg_ratio(void)
+{
+	u64 pos = benefit.pos;
+	u64 neg = benefit.neg;
+
+	if (!neg)
+		return 0;
+
+	if (!pos || neg > pos)
+		return 100;
+
+	if (neg > div64_u64(U64_MAX, 100))
+		pos = div64_u64(pos, 100);
+	else
+		neg *= 100;
+
+	return div64_u64(neg, pos);
+}
+
+static inline unsigned long get_current_benefit(void)
+{
+	u64 pos = benefit.pos;
+	u64 neg = benefit.neg;
+	u64 scanned = benefit.scanned;
+
+	if (neg > pos)
+		return 0;
+
+	return div64_u64((pos - neg), scanned);
+}
+
+static inline int judge_rshash_direction(void)
+{
+	u64 current_neg_ratio, stable_benefit;
+	u64 current_benefit, delta = 0;
+	int ret = STILL;
+
+	/* Try to probe a value after the boot, and in case the system
+	   are still for a long time. */
+	if ((fully_scanned_round & 0xFFULL) == 10) {
+		ret = OBSCURE;
+		goto out;
+	}
+
+	current_neg_ratio = get_current_neg_ratio();
+
+	if (current_neg_ratio == 0) {
+		rshash_neg_cont_zero++;
+		if (rshash_neg_cont_zero > 2)
+			return GO_DOWN;
+		else
+			return STILL;
+	}
+	rshash_neg_cont_zero = 0;
+
+	if (current_neg_ratio > 90) {
+		ret = GO_UP;
+		goto out;
+	}
+
+	current_benefit = get_current_benefit();
+	stable_benefit = rshash_state.stable_benefit;
+
+	if (!stable_benefit) {
+		ret = OBSCURE;
+		goto out;
+	}
+
+	if (current_benefit > stable_benefit)
+		delta = current_benefit - stable_benefit;
+	else if (current_benefit < stable_benefit)
+		delta = stable_benefit - current_benefit;
+
+	delta = div64_u64(100 * delta , stable_benefit);
+
+	if (delta > 50) {
+		rshash_cont_obscure++;
+		if (rshash_cont_obscure > 2)
+			return OBSCURE;
+		else
+			return STILL;
+	}
+
+out:
+	rshash_cont_obscure = 0;
+	return ret;
+}
+
+/**
+ * rshash_adjust() - The main function to control the random sampling state
+ * machine for hash strength adapting.
+ *
+ * return true if hash_strength has changed.
+ */
+static inline int rshash_adjust(void)
+{
+	unsigned long prev_hash_strength = hash_strength;
+
+	if (!encode_benefit())
+		return 0;
+
+	switch (rshash_state.state) {
+	case RSHASH_STILL:
+		switch (judge_rshash_direction()) {
+		case GO_UP:
+			if (rshash_state.pre_direct == GO_DOWN)
+				hash_strength_delta = 0;
+
+			inc_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+			rshash_state.stable_benefit = get_current_benefit();
+			rshash_state.pre_direct = GO_UP;
+			break;
+
+		case GO_DOWN:
+			if (rshash_state.pre_direct == GO_UP)
+				hash_strength_delta = 0;
+
+			dec_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+			rshash_state.stable_benefit = get_current_benefit();
+			rshash_state.pre_direct = GO_DOWN;
+			break;
+
+		case OBSCURE:
+			rshash_state.stable_point = hash_strength;
+			rshash_state.turn_point_down = hash_strength;
+			rshash_state.turn_point_up = hash_strength;
+			rshash_state.turn_benefit_down = get_current_benefit();
+			rshash_state.turn_benefit_up = get_current_benefit();
+			rshash_state.lookup_window_index = 0;
+			rshash_state.state = RSHASH_TRYDOWN;
+			dec_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+			break;
+
+		case STILL:
+			break;
+		default:
+			BUG();
+		}
+		break;
+
+	case RSHASH_TRYDOWN:
+		if (rshash_state.lookup_window_index++ % 5 == 0)
+			rshash_state.below_count = 0;
+
+		if (get_current_benefit() < rshash_state.stable_benefit)
+			rshash_state.below_count++;
+		else if (get_current_benefit() >
+			 rshash_state.turn_benefit_down) {
+			rshash_state.turn_point_down = hash_strength;
+			rshash_state.turn_benefit_down = get_current_benefit();
+		}
+
+		if (rshash_state.below_count >= 3 ||
+		    judge_rshash_direction() == GO_UP ||
+		    hash_strength == 1) {
+			hash_strength = rshash_state.stable_point;
+			hash_strength_delta = 0;
+			inc_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+			rshash_state.lookup_window_index = 0;
+			rshash_state.state = RSHASH_TRYUP;
+			hash_strength_delta = 0;
+		} else {
+			dec_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+		}
+		break;
+
+	case RSHASH_TRYUP:
+		if (rshash_state.lookup_window_index++ % 5 == 0)
+			rshash_state.below_count = 0;
+
+		if (get_current_benefit() < rshash_state.turn_benefit_down)
+			rshash_state.below_count++;
+		else if (get_current_benefit() > rshash_state.turn_benefit_up) {
+			rshash_state.turn_point_up = hash_strength;
+			rshash_state.turn_benefit_up = get_current_benefit();
+		}
+
+		if (rshash_state.below_count >= 3 ||
+		    judge_rshash_direction() == GO_DOWN ||
+		    hash_strength == HASH_STRENGTH_MAX) {
+			hash_strength = rshash_state.turn_benefit_up >
+				rshash_state.turn_benefit_down ?
+				rshash_state.turn_point_up :
+				rshash_state.turn_point_down;
+
+			rshash_state.state = RSHASH_PRE_STILL;
+		} else {
+			inc_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+		}
+
+		break;
+
+	case RSHASH_NEW:
+	case RSHASH_PRE_STILL:
+		rshash_state.stable_benefit = get_current_benefit();
+		rshash_state.state = RSHASH_STILL;
+		hash_strength_delta = 0;
+		break;
+	default:
+		BUG();
+	}
+
+	/* rshash_neg = rshash_pos = 0; */
+	reset_benefit();
+
+	if (prev_hash_strength != hash_strength)
+		stable_tree_delta_hash(prev_hash_strength);
+
+	return prev_hash_strength != hash_strength;
+}
+
+/**
+ * round_update_ladder() - The main function to do update of all the
+ * adjustments whenever a scan round is finished.
+ */
+static noinline void round_update_ladder(void)
+{
+	int i;
+	unsigned long dedup;
+	struct vma_slot *slot, *tmp_slot;
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		uksm_scan_ladder[i].flags &= ~UKSM_RUNG_ROUND_FINISHED;
+	}
+
+	list_for_each_entry_safe(slot, tmp_slot, &vma_slot_dedup, dedup_list) {
+
+		/* slot may be rung_rm_slot() when mm exits */
+		if (slot->snode) {
+			dedup = cal_dedup_ratio_old(slot);
+			if (dedup && dedup >= uksm_abundant_threshold)
+				vma_rung_up(slot);
+		}
+
+		slot->pages_bemerged = 0;
+		slot->pages_cowed = 0;
+
+		list_del_init(&slot->dedup_list);
+	}
+}
+
+static void uksm_del_vma_slot(struct vma_slot *slot)
+{
+	int i, j;
+	struct rmap_list_entry *entry;
+
+	if (slot->snode) {
+		/*
+		 * In case it just failed when entering the rung, it's not
+		 * necessary.
+		 */
+		rung_rm_slot(slot);
+	}
+
+	if (!list_empty(&slot->dedup_list))
+		list_del(&slot->dedup_list);
+
+	if (!slot->rmap_list_pool || !slot->pool_counts) {
+		/* In case it OOMed in uksm_vma_enter() */
+		goto out;
+	}
+
+	for (i = 0; i < slot->pool_size; i++) {
+		void *addr;
+
+		if (!slot->rmap_list_pool[i])
+			continue;
+
+		addr = kmap(slot->rmap_list_pool[i]);
+		for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
+			entry = (struct rmap_list_entry *)addr + j;
+			if (is_addr(entry->addr))
+				continue;
+			if (!entry->item)
+				continue;
+
+			remove_rmap_item_from_tree(entry->item);
+			free_rmap_item(entry->item);
+			slot->pool_counts[i]--;
+		}
+		BUG_ON(slot->pool_counts[i]);
+		kunmap(slot->rmap_list_pool[i]);
+		__free_page(slot->rmap_list_pool[i]);
+	}
+	kfree(slot->rmap_list_pool);
+	kfree(slot->pool_counts);
+
+out:
+	slot->rung = NULL;
+	if (slot->flags & UKSM_SLOT_IN_UKSM) {
+		BUG_ON(uksm_pages_total < slot->pages);
+		uksm_pages_total -= slot->pages;
+	}
+
+	if (slot->fully_scanned_round == fully_scanned_round)
+		scanned_virtual_pages -= slot->pages;
+	else
+		scanned_virtual_pages -= slot->pages_scanned;
+	free_vma_slot(slot);
+}
+
+
+#define SPIN_LOCK_PERIOD	32
+static struct vma_slot *cleanup_slots[SPIN_LOCK_PERIOD];
+static inline void cleanup_vma_slots(void)
+{
+	struct vma_slot *slot;
+	int i;
+
+	i = 0;
+	spin_lock(&vma_slot_list_lock);
+	while (!list_empty(&vma_slot_del)) {
+		slot = list_entry(vma_slot_del.next,
+				  struct vma_slot, slot_list);
+		list_del(&slot->slot_list);
+		cleanup_slots[i++] = slot;
+		if (i == SPIN_LOCK_PERIOD) {
+			spin_unlock(&vma_slot_list_lock);
+			while (--i >= 0)
+				uksm_del_vma_slot(cleanup_slots[i]);
+			i = 0;
+			spin_lock(&vma_slot_list_lock);
+		}
+	}
+	spin_unlock(&vma_slot_list_lock);
+
+	while (--i >= 0)
+		uksm_del_vma_slot(cleanup_slots[i]);
+}
+
+/*
+*expotional moving average formula
+*/
+static inline unsigned long ema(unsigned long curr, unsigned long last_ema)
+{
+	/*
+	 * For a very high burst, even the ema cannot work well, a false very
+	 * high per-page time estimation can result in feedback in very high
+	 * overhead of context swith and rung update -- this will then lead
+	 * to higher per-paper time, this may not converge.
+	 *
+	 * Instead, we try to approach this value in a binary manner.
+	 */
+	if (curr > last_ema * 10)
+		return last_ema * 2;
+
+	return (EMA_ALPHA * curr + (100 - EMA_ALPHA) * last_ema) / 100;
+}
+
+/*
+ * convert cpu ratio in 1/TIME_RATIO_SCALE configured by user to
+ * nanoseconds based on current uksm_sleep_jiffies.
+ */
+static inline unsigned long cpu_ratio_to_nsec(unsigned int ratio)
+{
+	return NSEC_PER_USEC * jiffies_to_usecs(uksm_sleep_jiffies) /
+		(TIME_RATIO_SCALE - ratio) * ratio;
+}
+
+
+static inline unsigned long rung_real_ratio(int cpu_time_ratio)
+{
+	unsigned long ret;
+
+	BUG_ON(!cpu_time_ratio);
+
+	if (cpu_time_ratio > 0)
+		ret = cpu_time_ratio;
+	else
+		ret = (unsigned long)(-cpu_time_ratio) *
+			uksm_max_cpu_percentage / 100UL;
+
+	return ret ? ret : 1;
+}
+
+static noinline void uksm_calc_scan_pages(void)
+{
+	struct scan_rung *ladder = uksm_scan_ladder;
+	unsigned long sleep_usecs, nsecs;
+	unsigned long ratio;
+	int i;
+	unsigned long per_page;
+
+	if (uksm_ema_page_time > 100000 ||
+	    (((unsigned long) uksm_eval_round & (256UL - 1)) == 0UL))
+		uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
+
+	per_page = uksm_ema_page_time;
+	BUG_ON(!per_page);
+
+	/*
+	 * For every 8 eval round, we try to probe a uksm_sleep_jiffies value
+	 * based on saved user input.
+	 */
+	if (((unsigned long) uksm_eval_round & (8UL - 1)) == 0UL)
+		uksm_sleep_jiffies = uksm_sleep_saved;
+
+	/* We require a rung scan at least 1 page in a period. */
+	nsecs = per_page;
+	ratio = rung_real_ratio(ladder[0].cpu_ratio);
+	if (cpu_ratio_to_nsec(ratio) < nsecs) {
+		sleep_usecs = nsecs * (TIME_RATIO_SCALE - ratio) / ratio
+				/ NSEC_PER_USEC;
+		uksm_sleep_jiffies = usecs_to_jiffies(sleep_usecs) + 1;
+	}
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		ratio = rung_real_ratio(ladder[i].cpu_ratio);
+		ladder[i].pages_to_scan = cpu_ratio_to_nsec(ratio) /
+					per_page;
+		BUG_ON(!ladder[i].pages_to_scan);
+		uksm_calc_rung_step(&ladder[i], per_page, ratio);
+	}
+}
+
+/*
+ * From the scan time of this round (ns) to next expected min sleep time
+ * (ms), be careful of the possible overflows. ratio is taken from
+ * rung_real_ratio()
+ */
+static inline
+unsigned int scan_time_to_sleep(unsigned long long scan_time, unsigned long ratio)
+{
+	scan_time >>= 20; /* to msec level now */
+	BUG_ON(scan_time > (ULONG_MAX / TIME_RATIO_SCALE));
+
+	return (unsigned int) ((unsigned long) scan_time *
+			       (TIME_RATIO_SCALE - ratio) / ratio);
+}
+
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+
+static void uksm_vma_enter(struct vma_slot **slots, unsigned long num)
+{
+	struct scan_rung *rung;
+
+	rung = &uksm_scan_ladder[0];
+	rung_add_new_slots(rung, slots, num);
+}
+
+static struct vma_slot *batch_slots[SLOT_TREE_NODE_STORE_SIZE];
+
+static void uksm_enter_all_slots(void)
+{
+	struct vma_slot *slot;
+	unsigned long index;
+	struct list_head empty_vma_list;
+	int i;
+
+	i = 0;
+	index = 0;
+	INIT_LIST_HEAD(&empty_vma_list);
+
+	spin_lock(&vma_slot_list_lock);
+	while (!list_empty(&vma_slot_new)) {
+		slot = list_entry(vma_slot_new.next,
+				  struct vma_slot, slot_list);
+
+		if (!slot->vma->anon_vma) {
+			list_move(&slot->slot_list, &empty_vma_list);
+		} else if (vma_can_enter(slot->vma)) {
+			batch_slots[index++] = slot;
+			list_del_init(&slot->slot_list);
+		} else {
+			list_move(&slot->slot_list, &vma_slot_noadd);
+		}
+
+		if (++i == SPIN_LOCK_PERIOD ||
+		    (index && !(index % SLOT_TREE_NODE_STORE_SIZE))) {
+			spin_unlock(&vma_slot_list_lock);
+
+			if (index && !(index % SLOT_TREE_NODE_STORE_SIZE)) {
+				uksm_vma_enter(batch_slots, index);
+				index = 0;
+			}
+			i = 0;
+			cond_resched();
+			spin_lock(&vma_slot_list_lock);
+		}
+	}
+
+	list_splice(&empty_vma_list, &vma_slot_new);
+
+	spin_unlock(&vma_slot_list_lock);
+
+	if (index)
+		uksm_vma_enter(batch_slots, index);
+
+}
+
+static inline int rung_round_finished(struct scan_rung *rung)
+{
+	return rung->flags & UKSM_RUNG_ROUND_FINISHED;
+}
+
+static inline void judge_slot(struct vma_slot *slot)
+{
+	struct scan_rung *rung = slot->rung;
+	unsigned long dedup;
+	int deleted;
+
+	dedup = cal_dedup_ratio(slot);
+	if (vma_fully_scanned(slot) && uksm_thrash_threshold)
+		deleted = vma_rung_enter(slot, &uksm_scan_ladder[0]);
+	else if (dedup && dedup >= uksm_abundant_threshold)
+		deleted = vma_rung_up(slot);
+	else
+		deleted = vma_rung_down(slot);
+
+	slot->pages_merged = 0;
+	slot->pages_cowed = 0;
+
+	if (vma_fully_scanned(slot))
+		slot->pages_scanned = 0;
+
+	slot->last_scanned = slot->pages_scanned;
+
+	/* If its deleted in above, then rung was already advanced. */
+	if (!deleted)
+		advance_current_scan(rung);
+}
+
+
+static inline int hash_round_finished(void)
+{
+	if (scanned_virtual_pages > (uksm_pages_total >> 2)) {
+		scanned_virtual_pages = 0;
+		if (uksm_pages_scanned)
+			fully_scanned_round++;
+
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+#define UKSM_MMSEM_BATCH	5
+#define BUSY_RETRY		100
+
+/**
+ * uksm_do_scan()  - the main worker function.
+ */
+static noinline void uksm_do_scan(void)
+{
+	struct vma_slot *slot, *iter;
+	struct mm_struct *busy_mm;
+	unsigned char round_finished, all_rungs_emtpy;
+	int i, err, mmsem_batch;
+	unsigned long pcost;
+	long long delta_exec;
+	unsigned long vpages, max_cpu_ratio;
+	unsigned long long start_time, end_time, scan_time;
+	unsigned int expected_jiffies;
+
+	might_sleep();
+
+	vpages = 0;
+
+	start_time = task_sched_runtime(current);
+	max_cpu_ratio = 0;
+	mmsem_batch = 0;
+
+	for (i = 0; i < SCAN_LADDER_SIZE;) {
+		struct scan_rung *rung = &uksm_scan_ladder[i];
+		unsigned long ratio;
+		int busy_retry;
+
+		if (!rung->pages_to_scan) {
+			i++;
+			continue;
+		}
+
+		if (!rung->vma_root.num) {
+			rung->pages_to_scan = 0;
+			i++;
+			continue;
+		}
+
+		ratio = rung_real_ratio(rung->cpu_ratio);
+		if (ratio > max_cpu_ratio)
+			max_cpu_ratio = ratio;
+
+		busy_retry = BUSY_RETRY;
+		/*
+		 * Do not consider rung_round_finished() here, just used up the
+		 * rung->pages_to_scan quota.
+		 */
+		while (rung->pages_to_scan && rung->vma_root.num &&
+		       likely(!freezing(current))) {
+			int reset = 0;
+
+			slot = rung->current_scan;
+
+			BUG_ON(vma_fully_scanned(slot));
+
+			if (mmsem_batch) {
+				err = 0;
+			} else {
+				err = try_down_read_slot_mmap_sem(slot);
+			}
+
+			if (err == -ENOENT) {
+rm_slot:
+				rung_rm_slot(slot);
+				continue;
+			}
+
+			busy_mm = slot->mm;
+
+			if (err == -EBUSY) {
+				/* skip other vmas on the same mm */
+				do {
+					reset = advance_current_scan(rung);
+					iter = rung->current_scan;
+					busy_retry--;
+					if (iter->vma->vm_mm != busy_mm ||
+					    !busy_retry || reset)
+						break;
+				} while (1);
+
+				if (iter->vma->vm_mm != busy_mm) {
+					continue;
+				} else {
+					/* scan round finsished */
+					break;
+				}
+			}
+
+			BUG_ON(!vma_can_enter(slot->vma));
+			if (uksm_test_exit(slot->vma->vm_mm)) {
+				mmsem_batch = 0;
+				up_read(&slot->vma->vm_mm->mmap_sem);
+				goto rm_slot;
+			}
+
+			if (mmsem_batch)
+				mmsem_batch--;
+			else
+				mmsem_batch = UKSM_MMSEM_BATCH;
+
+			/* Ok, we have take the mmap_sem, ready to scan */
+			scan_vma_one_page(slot);
+			rung->pages_to_scan--;
+			vpages++;
+
+			if (rung->current_offset + rung->step > slot->pages - 1
+			    || vma_fully_scanned(slot)) {
+				up_read(&slot->vma->vm_mm->mmap_sem);
+				judge_slot(slot);
+				mmsem_batch = 0;
+			} else {
+				rung->current_offset += rung->step;
+				if (!mmsem_batch)
+					up_read(&slot->vma->vm_mm->mmap_sem);
+			}
+
+			busy_retry = BUSY_RETRY;
+			cond_resched();
+		}
+
+		if (mmsem_batch) {
+			up_read(&slot->vma->vm_mm->mmap_sem);
+			mmsem_batch = 0;
+		}
+
+		if (freezing(current))
+			break;
+
+		cond_resched();
+	}
+	end_time = task_sched_runtime(current);
+	delta_exec = end_time - start_time;
+
+	if (freezing(current))
+		return;
+
+	cleanup_vma_slots();
+	uksm_enter_all_slots();
+
+	round_finished = 1;
+	all_rungs_emtpy = 1;
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		struct scan_rung *rung = &uksm_scan_ladder[i];
+
+		if (rung->vma_root.num) {
+			all_rungs_emtpy = 0;
+			if (!rung_round_finished(rung))
+				round_finished = 0;
+		}
+	}
+
+	if (all_rungs_emtpy)
+		round_finished = 0;
+
+	if (round_finished) {
+		round_update_ladder();
+		uksm_eval_round++;
+
+		if (hash_round_finished() && rshash_adjust()) {
+			/* Reset the unstable root iff hash strength changed */
+			uksm_hash_round++;
+			root_unstable_tree = RB_ROOT;
+			free_all_tree_nodes(&unstable_tree_node_list);
+		}
+
+		/*
+		 * A number of pages can hang around indefinitely on per-cpu
+		 * pagevecs, raised page count preventing write_protect_page
+		 * from merging them.  Though it doesn't really matter much,
+		 * it is puzzling to see some stuck in pages_volatile until
+		 * other activity jostles them out, and they also prevented
+		 * LTP's KSM test from succeeding deterministically; so drain
+		 * them here (here rather than on entry to uksm_do_scan(),
+		 * so we don't IPI too often when pages_to_scan is set low).
+		 */
+		lru_add_drain_all();
+	}
+
+
+	if (vpages && delta_exec > 0) {
+		pcost = (unsigned long) delta_exec / vpages;
+		if (likely(uksm_ema_page_time))
+			uksm_ema_page_time = ema(pcost, uksm_ema_page_time);
+		else
+			uksm_ema_page_time = pcost;
+	}
+
+	uksm_calc_scan_pages();
+	uksm_sleep_real = uksm_sleep_jiffies;
+	/* in case of radical cpu bursts, apply the upper bound */
+	end_time = task_sched_runtime(current);
+	if (max_cpu_ratio && end_time > start_time) {
+		scan_time = end_time - start_time;
+		expected_jiffies = msecs_to_jiffies(
+			scan_time_to_sleep(scan_time, max_cpu_ratio));
+
+		if (expected_jiffies > uksm_sleep_real)
+			uksm_sleep_real = expected_jiffies;
+
+		/* We have a 1 second up bound for responsiveness. */
+		if (jiffies_to_msecs(uksm_sleep_real) > MSEC_PER_SEC)
+			uksm_sleep_real = msecs_to_jiffies(1000);
+	}
+
+	return;
+}
+
+static int ksmd_should_run(void)
+{
+	return uksm_run & UKSM_RUN_MERGE;
+}
+
+static int uksm_scan_thread(void *nothing)
+{
+	set_freezable();
+	set_user_nice(current, 5);
+
+	while (!kthread_should_stop()) {
+		mutex_lock(&uksm_thread_mutex);
+		if (ksmd_should_run()) {
+			uksm_do_scan();
+		}
+		mutex_unlock(&uksm_thread_mutex);
+
+		try_to_freeze();
+
+		if (ksmd_should_run()) {
+			schedule_timeout_interruptible(uksm_sleep_real);
+			uksm_sleep_times++;
+		} else {
+			wait_event_freezable(uksm_thread_wait,
+				ksmd_should_run() || kthread_should_stop());
+		}
+	}
+	return 0;
+}
+
+int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
+{
+	struct stable_node *stable_node;
+	struct node_vma *node_vma;
+	struct rmap_item *rmap_item;
+	int ret = SWAP_AGAIN;
+	int search_new_forks = 0;
+	unsigned long address;
+
+	VM_BUG_ON_PAGE(!PageKsm(page), page);
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+
+	stable_node = page_stable_node(page);
+	if (!stable_node)
+		return ret;
+again:
+	hlist_for_each_entry(node_vma, &stable_node->hlist, hlist) {
+		hlist_for_each_entry(rmap_item, &node_vma->rmap_hlist, hlist) {
+			struct anon_vma *anon_vma = rmap_item->anon_vma;
+			struct anon_vma_chain *vmac;
+			struct vm_area_struct *vma;
+
+			anon_vma_lock_read(anon_vma);
+			anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
+						       0, ULONG_MAX) {
+				vma = vmac->vma;
+				address = get_rmap_addr(rmap_item);
+
+				if (address < vma->vm_start ||
+				    address >= vma->vm_end)
+					continue;
+
+				if ((rmap_item->slot->vma == vma) ==
+				    search_new_forks)
+					continue;
+
+				if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+					continue;
+
+				ret = rwc->rmap_one(page, vma, address, rwc->arg);
+				if (ret != SWAP_AGAIN) {
+					anon_vma_unlock_read(anon_vma);
+					goto out;
+				}
+
+				if (rwc->done && rwc->done(page)) {
+					anon_vma_unlock_read(anon_vma);
+					goto out;
+				}
+			}
+			anon_vma_unlock_read(anon_vma);
+		}
+	}
+	if (!search_new_forks++)
+		goto again;
+out:
+	return ret;
+}
+
+#ifdef CONFIG_MIGRATION
+/* Common ksm interface but may be specific to uksm */
+void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+{
+	struct stable_node *stable_node;
+
+	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
+	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+	VM_BUG_ON(newpage->mapping != oldpage->mapping);
+
+	stable_node = page_stable_node(newpage);
+	if (stable_node) {
+		VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
+		stable_node->kpfn = page_to_pfn(newpage);
+	}
+}
+#endif /* CONFIG_MIGRATION */
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static struct stable_node *uksm_check_stable_tree(unsigned long start_pfn,
+						 unsigned long end_pfn)
+{
+	struct rb_node *node;
+
+	for (node = rb_first(root_stable_treep); node; node = rb_next(node)) {
+		struct stable_node *stable_node;
+
+		stable_node = rb_entry(node, struct stable_node, node);
+		if (stable_node->kpfn >= start_pfn &&
+		    stable_node->kpfn < end_pfn)
+			return stable_node;
+	}
+	return NULL;
+}
+
+static int uksm_memory_callback(struct notifier_block *self,
+			       unsigned long action, void *arg)
+{
+	struct memory_notify *mn = arg;
+	struct stable_node *stable_node;
+
+	switch (action) {
+	case MEM_GOING_OFFLINE:
+		/*
+		 * Keep it very simple for now: just lock out ksmd and
+		 * MADV_UNMERGEABLE while any memory is going offline.
+		 * mutex_lock_nested() is necessary because lockdep was alarmed
+		 * that here we take uksm_thread_mutex inside notifier chain
+		 * mutex, and later take notifier chain mutex inside
+		 * uksm_thread_mutex to unlock it.   But that's safe because both
+		 * are inside mem_hotplug_mutex.
+		 */
+		mutex_lock_nested(&uksm_thread_mutex, SINGLE_DEPTH_NESTING);
+		break;
+
+	case MEM_OFFLINE:
+		/*
+		 * Most of the work is done by page migration; but there might
+		 * be a few stable_nodes left over, still pointing to struct
+		 * pages which have been offlined: prune those from the tree.
+		 */
+		while ((stable_node = uksm_check_stable_tree(mn->start_pfn,
+					mn->start_pfn + mn->nr_pages)) != NULL)
+			remove_node_from_stable_tree(stable_node, 1, 1);
+		/* fallthrough */
+
+	case MEM_CANCEL_OFFLINE:
+		mutex_unlock(&uksm_thread_mutex);
+		break;
+	}
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
+#ifdef CONFIG_SYSFS
+/*
+ * This all compiles without CONFIG_SYSFS, but is a waste of space.
+ */
+
+#define UKSM_ATTR_RO(_name) \
+	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+#define UKSM_ATTR(_name) \
+	static struct kobj_attribute _name##_attr = \
+		__ATTR(_name, 0644, _name##_show, _name##_store)
+
+static ssize_t max_cpu_percentage_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", uksm_max_cpu_percentage);
+}
+
+static ssize_t max_cpu_percentage_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	unsigned long max_cpu_percentage;
+	int err;
+
+	err = kstrtoul(buf, 10, &max_cpu_percentage);
+	if (err || max_cpu_percentage > 100)
+		return -EINVAL;
+
+	if (max_cpu_percentage == 100)
+		max_cpu_percentage = 99;
+	else if (max_cpu_percentage < 10)
+		max_cpu_percentage = 10;
+
+	uksm_max_cpu_percentage = max_cpu_percentage;
+
+	return count;
+}
+UKSM_ATTR(max_cpu_percentage);
+
+static ssize_t sleep_millisecs_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", jiffies_to_msecs(uksm_sleep_jiffies));
+}
+
+static ssize_t sleep_millisecs_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	unsigned long msecs;
+	int err;
+
+	err = kstrtoul(buf, 10, &msecs);
+	if (err || msecs > MSEC_PER_SEC)
+		return -EINVAL;
+
+	uksm_sleep_jiffies = msecs_to_jiffies(msecs);
+	uksm_sleep_saved = uksm_sleep_jiffies;
+
+	return count;
+}
+UKSM_ATTR(sleep_millisecs);
+
+
+static ssize_t cpu_governor_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
+	int i;
+
+	buf[0] = '\0';
+	for (i = 0; i < n ; i++) {
+		if (uksm_cpu_governor == i)
+			strcat(buf, "[");
+
+		strcat(buf, uksm_cpu_governor_str[i]);
+
+		if (uksm_cpu_governor == i)
+			strcat(buf, "]");
+
+		strcat(buf, " ");
+	}
+	strcat(buf, "\n");
+
+	return strlen(buf);
+}
+
+static inline void init_performance_values(void)
+{
+	int i;
+	struct scan_rung *rung;
+	struct uksm_cpu_preset_s *preset = uksm_cpu_preset + uksm_cpu_governor;
+
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = uksm_scan_ladder + i;
+		rung->cpu_ratio = preset->cpu_ratio[i];
+		rung->cover_msecs = preset->cover_msecs[i];
+	}
+
+	uksm_max_cpu_percentage = preset->max_cpu;
+}
+
+static ssize_t cpu_governor_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t count)
+{
+	int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
+
+	for (n--; n >=0 ; n--) {
+		if (!strncmp(buf, uksm_cpu_governor_str[n],
+			     strlen(uksm_cpu_governor_str[n])))
+			break;
+	}
+
+	if (n < 0)
+		return -EINVAL;
+	else
+		uksm_cpu_governor = n;
+
+	init_performance_values();
+
+	return count;
+}
+UKSM_ATTR(cpu_governor);
+
+static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
+			char *buf)
+{
+	return sprintf(buf, "%u\n", uksm_run);
+}
+
+static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
+			 const char *buf, size_t count)
+{
+	int err;
+	unsigned long flags;
+
+	err = kstrtoul(buf, 10, &flags);
+	if (err || flags > UINT_MAX)
+		return -EINVAL;
+	if (flags > UKSM_RUN_MERGE)
+		return -EINVAL;
+
+	mutex_lock(&uksm_thread_mutex);
+	if (uksm_run != flags) {
+		uksm_run = flags;
+	}
+	mutex_unlock(&uksm_thread_mutex);
+
+	if (flags & UKSM_RUN_MERGE)
+		wake_up_interruptible(&uksm_thread_wait);
+
+	return count;
+}
+UKSM_ATTR(run);
+
+static ssize_t abundant_threshold_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", uksm_abundant_threshold);
+}
+
+static ssize_t abundant_threshold_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t count)
+{
+	int err;
+	unsigned long flags;
+
+	err = kstrtoul(buf, 10, &flags);
+	if (err || flags > 99)
+		return -EINVAL;
+
+	uksm_abundant_threshold = flags;
+
+	return count;
+}
+UKSM_ATTR(abundant_threshold);
+
+static ssize_t thrash_threshold_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", uksm_thrash_threshold);
+}
+
+static ssize_t thrash_threshold_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t count)
+{
+	int err;
+	unsigned long flags;
+
+	err = kstrtoul(buf, 10, &flags);
+	if (err || flags > 99)
+		return -EINVAL;
+
+	uksm_thrash_threshold = flags;
+
+	return count;
+}
+UKSM_ATTR(thrash_threshold);
+
+static ssize_t cpu_ratios_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	int i, size;
+	struct scan_rung *rung;
+	char *p = buf;
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = &uksm_scan_ladder[i];
+
+		if (rung->cpu_ratio > 0)
+			size = sprintf(p, "%d ", rung->cpu_ratio);
+		else
+			size = sprintf(p, "MAX/%d ",
+					TIME_RATIO_SCALE / -rung->cpu_ratio);
+
+		p += size;
+	}
+
+	*p++ = '\n';
+	*p = '\0';
+
+	return p - buf;
+}
+
+static ssize_t cpu_ratios_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t count)
+{
+	int i, cpuratios[SCAN_LADDER_SIZE], err;
+	unsigned long value;
+	struct scan_rung *rung;
+	char *p, *end = NULL;
+
+	p = kzalloc(count, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, buf, count);
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		if (i != SCAN_LADDER_SIZE -1) {
+			end = strchr(p, ' ');
+			if (!end)
+				return -EINVAL;
+
+			*end = '\0';
+		}
+
+		if (strstr(p, "MAX/")) {
+			p = strchr(p, '/') + 1;
+			err = kstrtoul(p, 10, &value);
+			if (err || value > TIME_RATIO_SCALE || !value)
+				return -EINVAL;
+
+			cpuratios[i] = - (int) (TIME_RATIO_SCALE / value);
+		} else {
+			err = kstrtoul(p, 10, &value);
+			if (err || value > TIME_RATIO_SCALE || !value)
+				return -EINVAL;
+
+			cpuratios[i] = value;
+		}
+
+		p = end + 1;
+	}
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = &uksm_scan_ladder[i];
+
+		rung->cpu_ratio = cpuratios[i];
+	}
+
+	return count;
+}
+UKSM_ATTR(cpu_ratios);
+
+static ssize_t eval_intervals_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	int i, size;
+	struct scan_rung *rung;
+	char *p = buf;
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = &uksm_scan_ladder[i];
+		size = sprintf(p, "%u ", rung->cover_msecs);
+		p += size;
+	}
+
+	*p++ = '\n';
+	*p = '\0';
+
+	return p - buf;
+}
+
+static ssize_t eval_intervals_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t count)
+{
+	int i, err;
+	unsigned long values[SCAN_LADDER_SIZE];
+	struct scan_rung *rung;
+	char *p, *end = NULL;
+
+	p = kzalloc(count, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, buf, count);
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		if (i != SCAN_LADDER_SIZE -1) {
+			end = strchr(p, ' ');
+			if (!end)
+				return -EINVAL;
+
+			*end = '\0';
+		}
+
+		err = kstrtoul(p, 10, &values[i]);
+		if (err)
+			return -EINVAL;
+
+		p = end + 1;
+	}
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = &uksm_scan_ladder[i];
+
+		rung->cover_msecs = values[i];
+	}
+
+	return count;
+}
+UKSM_ATTR(eval_intervals);
+
+static ssize_t ema_per_page_time_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", uksm_ema_page_time);
+}
+UKSM_ATTR_RO(ema_per_page_time);
+
+static ssize_t pages_shared_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", uksm_pages_shared);
+}
+UKSM_ATTR_RO(pages_shared);
+
+static ssize_t pages_sharing_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", uksm_pages_sharing);
+}
+UKSM_ATTR_RO(pages_sharing);
+
+static ssize_t pages_unshared_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", uksm_pages_unshared);
+}
+UKSM_ATTR_RO(pages_unshared);
+
+static ssize_t full_scans_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%llu\n", fully_scanned_round);
+}
+UKSM_ATTR_RO(full_scans);
+
+static ssize_t pages_scanned_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	unsigned long base = 0;
+	u64 delta, ret;
+
+	if (pages_scanned_stored) {
+		base = pages_scanned_base;
+		ret = pages_scanned_stored;
+		delta = uksm_pages_scanned >> base;
+		if (CAN_OVERFLOW_U64(ret, delta)) {
+			ret >>= 1;
+			delta >>= 1;
+			base++;
+			ret += delta;
+		}
+	} else {
+		ret = uksm_pages_scanned;
+	}
+
+	while (ret > ULONG_MAX) {
+		ret >>= 1;
+		base++;
+	}
+
+	if (base)
+		return sprintf(buf, "%lu * 2^%lu\n", (unsigned long)ret, base);
+	else
+		return sprintf(buf, "%lu\n", (unsigned long)ret);
+}
+UKSM_ATTR_RO(pages_scanned);
+
+static ssize_t hash_strength_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", hash_strength);
+}
+UKSM_ATTR_RO(hash_strength);
+
+static ssize_t sleep_times_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%llu\n", uksm_sleep_times);
+}
+UKSM_ATTR_RO(sleep_times);
+
+
+static struct attribute *uksm_attrs[] = {
+	&max_cpu_percentage_attr.attr,
+	&sleep_millisecs_attr.attr,
+	&cpu_governor_attr.attr,
+	&run_attr.attr,
+	&ema_per_page_time_attr.attr,
+	&pages_shared_attr.attr,
+	&pages_sharing_attr.attr,
+	&pages_unshared_attr.attr,
+	&full_scans_attr.attr,
+	&pages_scanned_attr.attr,
+	&hash_strength_attr.attr,
+	&sleep_times_attr.attr,
+	&thrash_threshold_attr.attr,
+	&abundant_threshold_attr.attr,
+	&cpu_ratios_attr.attr,
+	&eval_intervals_attr.attr,
+	NULL,
+};
+
+static struct attribute_group uksm_attr_group = {
+	.attrs = uksm_attrs,
+	.name = "uksm",
+};
+#endif /* CONFIG_SYSFS */
+
+static inline void init_scan_ladder(void)
+{
+	int i;
+	struct scan_rung *rung;
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = uksm_scan_ladder + i;
+		slot_tree_init_root(&rung->vma_root);
+	}
+
+	init_performance_values();
+	uksm_calc_scan_pages();
+}
+
+static inline int cal_positive_negative_costs(void)
+{
+	struct page *p1, *p2;
+	unsigned char *addr1, *addr2;
+	unsigned long i, time_start, hash_cost;
+	unsigned long loopnum = 0;
+
+	/*IMPORTANT: volatile is needed to prevent over-optimization by gcc. */
+	volatile u32 hash;
+	volatile int ret;
+
+	p1 = alloc_page(GFP_KERNEL);
+	if (!p1)
+		return -ENOMEM;
+
+	p2 = alloc_page(GFP_KERNEL);
+	if (!p2)
+		return -ENOMEM;
+
+	addr1 = kmap_atomic(p1);
+	addr2 = kmap_atomic(p2);
+	memset(addr1, prandom_u32(), PAGE_SIZE);
+	memcpy(addr2, addr1, PAGE_SIZE);
+
+	/* make sure that the two pages differ in last byte */
+	addr2[PAGE_SIZE-1] = ~addr2[PAGE_SIZE-1];
+	kunmap_atomic(addr2);
+	kunmap_atomic(addr1);
+
+	time_start = jiffies;
+	while (jiffies - time_start < 100) {
+		for (i = 0; i < 100; i++)
+			hash = page_hash(p1, HASH_STRENGTH_FULL, 0);
+		loopnum += 100;
+	}
+	hash_cost = (jiffies - time_start);
+
+	time_start = jiffies;
+	for (i = 0; i < loopnum; i++)
+		ret = pages_identical(p1, p2);
+	memcmp_cost = HASH_STRENGTH_FULL * (jiffies - time_start);
+	memcmp_cost /= hash_cost;
+	printk(KERN_INFO "UKSM: relative memcmp_cost = %lu "
+			 "hash=%u cmp_ret=%d.\n",
+	       memcmp_cost, hash, ret);
+
+	__free_page(p1);
+	__free_page(p2);
+	return 0;
+}
+
+static int init_zeropage_hash_table(void)
+{
+	struct page *page;
+	char *addr;
+	int i;
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	addr = kmap_atomic(page);
+	memset(addr, 0, PAGE_SIZE);
+	kunmap_atomic(addr);
+
+	zero_hash_table = kmalloc(HASH_STRENGTH_MAX * sizeof(u32),
+		GFP_KERNEL);
+	if (!zero_hash_table)
+		return -ENOMEM;
+
+	for (i = 0; i < HASH_STRENGTH_MAX; i++)
+		zero_hash_table[i] = page_hash(page, i, 0);
+
+	__free_page(page);
+
+	return 0;
+}
+
+static inline int init_random_sampling(void)
+{
+	unsigned long i;
+	random_nums = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!random_nums)
+		return -ENOMEM;
+
+	for (i = 0; i < HASH_STRENGTH_FULL; i++)
+		random_nums[i] = i;
+
+	for (i = 0; i < HASH_STRENGTH_FULL; i++) {
+		unsigned long rand_range, swap_index, tmp;
+
+		rand_range = HASH_STRENGTH_FULL - i;
+		swap_index = i + prandom_u32() % rand_range;
+		tmp = random_nums[i];
+		random_nums[i] =  random_nums[swap_index];
+		random_nums[swap_index] = tmp;
+	}
+
+	rshash_state.state = RSHASH_NEW;
+	rshash_state.below_count = 0;
+	rshash_state.lookup_window_index = 0;
+
+	return cal_positive_negative_costs();
+}
+
+static int __init uksm_slab_init(void)
+{
+	rmap_item_cache = UKSM_KMEM_CACHE(rmap_item, 0);
+	if (!rmap_item_cache)
+		goto out;
+
+	stable_node_cache = UKSM_KMEM_CACHE(stable_node, 0);
+	if (!stable_node_cache)
+		goto out_free1;
+
+	node_vma_cache = UKSM_KMEM_CACHE(node_vma, 0);
+	if (!node_vma_cache)
+		goto out_free2;
+
+	vma_slot_cache = UKSM_KMEM_CACHE(vma_slot, 0);
+	if (!vma_slot_cache)
+		goto out_free3;
+
+	tree_node_cache = UKSM_KMEM_CACHE(tree_node, 0);
+	if (!tree_node_cache)
+		goto out_free4;
+
+	return 0;
+
+out_free4:
+	kmem_cache_destroy(vma_slot_cache);
+out_free3:
+	kmem_cache_destroy(node_vma_cache);
+out_free2:
+	kmem_cache_destroy(stable_node_cache);
+out_free1:
+	kmem_cache_destroy(rmap_item_cache);
+out:
+	return -ENOMEM;
+}
+
+static void __init uksm_slab_free(void)
+{
+	kmem_cache_destroy(stable_node_cache);
+	kmem_cache_destroy(rmap_item_cache);
+	kmem_cache_destroy(node_vma_cache);
+	kmem_cache_destroy(vma_slot_cache);
+	kmem_cache_destroy(tree_node_cache);
+}
+
+/* Common interface to ksm, different to it. */
+int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags)
+{
+	int err;
+
+	switch (advice) {
+	case MADV_MERGEABLE:
+		return 0;		/* just ignore the advice */
+
+	case MADV_UNMERGEABLE:
+		if (!(*vm_flags & VM_MERGEABLE))
+			return 0;		/* just ignore the advice */
+
+		if (vma->anon_vma) {
+			err = unmerge_uksm_pages(vma, start, end);
+			if (err)
+				return err;
+		}
+
+		uksm_remove_vma(vma);
+		*vm_flags &= ~VM_MERGEABLE;
+		break;
+	}
+
+	return 0;
+}
+
+/* Common interface to ksm, actually the same. */
+struct page *ksm_might_need_to_copy(struct page *page,
+			struct vm_area_struct *vma, unsigned long address)
+{
+	struct anon_vma *anon_vma = page_anon_vma(page);
+	struct page *new_page;
+
+	if (PageKsm(page)) {
+		if (page_stable_node(page))
+			return page;	/* no need to copy it */
+	} else if (!anon_vma) {
+		return page;		/* no need to copy it */
+	} else if (anon_vma->root == vma->anon_vma->root &&
+		 page->index == linear_page_index(vma, address)) {
+		return page;		/* still no need to copy it */
+	}
+	if (!PageUptodate(page))
+		return page;		/* let do_swap_page report the error */
+
+	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+	if (new_page) {
+		copy_user_highpage(new_page, page, address, vma);
+
+		SetPageDirty(new_page);
+		__SetPageUptodate(new_page);
+		__set_page_locked(new_page);
+	}
+
+	return new_page;
+}
+
+static int __init uksm_init(void)
+{
+	struct task_struct *uksm_thread;
+	int err;
+
+	uksm_sleep_jiffies = msecs_to_jiffies(100);
+	uksm_sleep_saved = uksm_sleep_jiffies;
+
+	slot_tree_init();
+	init_scan_ladder();
+
+
+	err = init_random_sampling();
+	if (err)
+		goto out_free2;
+
+	err = uksm_slab_init();
+	if (err)
+		goto out_free1;
+
+	err = init_zeropage_hash_table();
+	if (err)
+		goto out_free0;
+
+	uksm_thread = kthread_run(uksm_scan_thread, NULL, "uksmd");
+	if (IS_ERR(uksm_thread)) {
+		printk(KERN_ERR "uksm: creating kthread failed\n");
+		err = PTR_ERR(uksm_thread);
+		goto out_free;
+	}
+
+#ifdef CONFIG_SYSFS
+	err = sysfs_create_group(mm_kobj, &uksm_attr_group);
+	if (err) {
+		printk(KERN_ERR "uksm: register sysfs failed\n");
+		kthread_stop(uksm_thread);
+		goto out_free;
+	}
+#else
+	uksm_run = UKSM_RUN_MERGE;	/* no way for user to start it */
+
+#endif /* CONFIG_SYSFS */
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+	/*
+	 * Choose a high priority since the callback takes uksm_thread_mutex:
+	 * later callbacks could only be taking locks which nest within that.
+	 */
+	hotplug_memory_notifier(uksm_memory_callback, 100);
+#endif
+	return 0;
+
+out_free:
+	kfree(zero_hash_table);
+out_free0:
+	uksm_slab_free();
+out_free1:
+	kfree(random_nums);
+out_free2:
+	kfree(uksm_scan_ladder);
+	return err;
+}
+
+#ifdef MODULE
+subsys_initcall(ksm_init);
+#else
+late_initcall(uksm_init);
+#endif
+
diff -Nur linux-4.2/mm/vmscan.c linux-4.2-pck/mm/vmscan.c
--- linux-4.2/mm/vmscan.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/vmscan.c	2015-09-08 11:20:32.560340751 -0300
@@ -1446,7 +1446,7 @@
 {
 	unsigned long inactive, isolated;
 
-	if (current_is_kswapd())
+	if (current_is_kswapd() || sc->hibernation_mode)
 		return 0;
 
 	if (!sane_reclaim(sc))
@@ -2295,6 +2295,9 @@
 	unsigned long pages_for_compaction;
 	unsigned long inactive_lru_pages;
 
+	if (nr_reclaimed && nr_scanned && sc->nr_to_reclaim >= sc->nr_reclaimed)
+		return true;
+
 	/* If not in reclaim/compaction mode, stop */
 	if (!in_reclaim_compaction(sc))
 		return false;
@@ -2605,6 +2608,12 @@
 	unsigned long total_scanned = 0;
 	unsigned long writeback_threshold;
 	bool zones_reclaimable;
+
+#ifdef CONFIG_FREEZER
+	if (unlikely(pm_freezing && !sc->hibernation_mode))
+		return 0;
+#endif
+
 retry:
 	delayacct_freepages_start();
 
@@ -3488,6 +3497,11 @@
 	if (!populated_zone(zone))
 		return;
 
+#ifdef CONFIG_FREEZER
+	if (pm_freezing)
+		return;
+#endif
+
 	if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
 		return;
 	pgdat = zone->zone_pgdat;
@@ -3513,7 +3527,7 @@
  * LRU order by reclaiming preferentially
  * inactive > active > active referenced > active mapped
  */
-unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
+unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, gfp_t mask)
 {
 	struct reclaim_state reclaim_state;
 	struct scan_control sc = {
@@ -3542,6 +3556,11 @@
 
 	return nr_reclaimed;
 }
+
+unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
+{
+	return shrink_memory_mask(nr_to_reclaim, GFP_HIGHUSER_MOVABLE);
+}
 #endif /* CONFIG_HIBERNATION */
 
 /* It's optimal to keep kswapds on the same CPUs as their memory, but
diff -Nur linux-4.2/mm/vmstat.c linux-4.2-pck/mm/vmstat.c
--- linux-4.2/mm/vmstat.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/mm/vmstat.c	2015-09-08 00:51:03.470667662 -0300
@@ -740,6 +740,9 @@
 	"nr_anon_transparent_hugepages",
 	"nr_free_cma",
 
+#ifdef CONFIG_UKSM
+	"nr_uksm_zero_pages",
+#endif
 	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",
 	"nr_dirty_background_threshold",
diff -Nur linux-4.2/net/core/secure_seq.c linux-4.2-pck/net/core/secure_seq.c
--- linux-4.2/net/core/secure_seq.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/net/core/secure_seq.c	2015-09-08 00:48:31.501257347 -0300
@@ -8,7 +8,11 @@
 #include <linux/ktime.h>
 #include <linux/string.h>
 #include <linux/net.h>
+#include <linux/socket.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
 
+#include <net/tcp.h>
 #include <net/secure_seq.h>
 
 #if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
@@ -39,6 +43,102 @@
 }
 #endif
 
+#ifdef CONFIG_TCP_STEALTH
+u32 tcp_stealth_sequence_number(struct sock *sk, __be32 *daddr,
+				u32 daddr_size, __be16 dport)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_md5sig_key *md5;
+
+	__u32 sec[MD5_MESSAGE_BYTES / sizeof(__u32)];
+	__u32 i;
+	__u32 tsval = 0;
+
+	__be32 iv[MD5_DIGEST_WORDS] = { 0 };
+	__be32 isn;
+
+	memcpy(iv, daddr, (daddr_size > sizeof(iv)) ? sizeof(iv) : daddr_size);
+
+#ifdef CONFIG_TCP_MD5SIG
+	md5 = tp->af_specific->md5_lookup(sk, sk);
+#else
+	md5 = NULL;
+#endif
+	if (likely(sysctl_tcp_timestamps && !md5) || tp->stealth.saw_tsval)
+		tsval = tp->stealth.mstamp.stamp_jiffies;
+
+	((__be16 *)iv)[2] ^= cpu_to_be16(tp->stealth.integrity_hash);
+	iv[2] ^= cpu_to_be32(tsval);
+	((__be16 *)iv)[6] ^= dport;
+
+	for (i = 0; i < MD5_DIGEST_WORDS; i++)
+		iv[i] = le32_to_cpu(iv[i]);
+	for (i = 0; i < MD5_MESSAGE_BYTES / sizeof(__le32); i++)
+		sec[i] = le32_to_cpu(((__le32 *)tp->stealth.secret)[i]);
+
+	md5_transform(iv, sec);
+
+	isn = cpu_to_be32(iv[0]) ^ cpu_to_be32(iv[1]) ^
+	      cpu_to_be32(iv[2]) ^ cpu_to_be32(iv[3]);
+
+	if (tp->stealth.mode & TCP_STEALTH_MODE_INTEGRITY)
+		be32_isn_to_be16_ih(isn) =
+			cpu_to_be16(tp->stealth.integrity_hash);
+
+	return be32_to_cpu(isn);
+}
+EXPORT_SYMBOL(tcp_stealth_sequence_number);
+
+u32 tcp_stealth_do_auth(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcphdr *th = tcp_hdr(skb);
+	__be32 isn = th->seq;
+	__be32 hash;
+	__be32 *daddr;
+	u32 daddr_size;
+
+	tp->stealth.saw_tsval =
+		tcp_parse_tsval_option(&tp->stealth.mstamp.stamp_jiffies, th);
+
+	if (tp->stealth.mode & TCP_STEALTH_MODE_INTEGRITY_LEN)
+		tp->stealth.integrity_hash =
+			be16_to_cpu(be32_isn_to_be16_ih(isn));
+
+	switch (tp->inet_conn.icsk_inet.sk.sk_family) {
+#if IS_ENABLED(CONFIG_IPV6)
+	case PF_INET6:
+		daddr_size = sizeof(ipv6_hdr(skb)->daddr.s6_addr32);
+		daddr = ipv6_hdr(skb)->daddr.s6_addr32;
+	break;
+#endif
+	case PF_INET:
+		daddr_size = sizeof(ip_hdr(skb)->daddr);
+		daddr = &ip_hdr(skb)->daddr;
+	break;
+	default:
+		pr_err("TCP Stealth: Unknown network layer protocol, stop!\n");
+		return 1;
+	}
+
+	hash = tcp_stealth_sequence_number(sk, daddr, daddr_size, th->dest);
+	cpu_to_be32s(&hash);
+
+	if (tp->stealth.mode & TCP_STEALTH_MODE_AUTH &&
+	    tp->stealth.mode & TCP_STEALTH_MODE_INTEGRITY_LEN &&
+	    be32_isn_to_be16_av(isn) == be32_isn_to_be16_av(hash))
+		return 0;
+
+	if (tp->stealth.mode & TCP_STEALTH_MODE_AUTH &&
+	    !(tp->stealth.mode & TCP_STEALTH_MODE_INTEGRITY_LEN) &&
+	    isn == hash)
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL(tcp_stealth_do_auth);
+#endif
+
 #if IS_ENABLED(CONFIG_IPV6)
 __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 				   __be16 sport, __be16 dport)
diff -Nur linux-4.2/net/ethernet/eth.c linux-4.2-pck/net/ethernet/eth.c
--- linux-4.2/net/ethernet/eth.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/net/ethernet/eth.c	2015-09-08 00:48:22.241696581 -0300
@@ -355,7 +355,7 @@
 	dev->hard_header_len 	= ETH_HLEN;
 	dev->mtu		= ETH_DATA_LEN;
 	dev->addr_len		= ETH_ALEN;
-	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */
+	dev->tx_queue_len	= 50;	/* Ethernet wants good latency.  Use FreeBSD defaults. */
 	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
 	dev->priv_flags		|= IFF_TX_SKB_SHARING;
 
diff -Nur linux-4.2/net/ipv4/Kconfig linux-4.2-pck/net/ipv4/Kconfig
--- linux-4.2/net/ipv4/Kconfig	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/net/ipv4/Kconfig	2015-09-08 00:48:31.501257347 -0300
@@ -653,6 +653,9 @@
 	config DEFAULT_VEGAS
 		bool "Vegas" if TCP_CONG_VEGAS=y
 
+	config DEFAULT_YEAH
+		bool "YeAH" if TCP_CONG_YEAH=y
+
 	config DEFAULT_VENO
 		bool "Veno" if TCP_CONG_VENO=y
 
@@ -683,6 +686,7 @@
 	default "htcp" if DEFAULT_HTCP
 	default "hybla" if DEFAULT_HYBLA
 	default "vegas" if DEFAULT_VEGAS
+	default "yeah" if DEFAULT_YEAH
 	default "westwood" if DEFAULT_WESTWOOD
 	default "veno" if DEFAULT_VENO
 	default "reno" if DEFAULT_RENO
@@ -700,3 +704,13 @@
 	  on the Internet.
 
 	  If unsure, say N.
+
+config TCP_STEALTH
+	bool "TCP: Stealth TCP socket support"
+	default n
+	---help---
+	  This option enables support for stealth TCP sockets. If you do not
+	  know what this means, you do not need it.
+
+	  If unsure, say N.
+
diff -Nur linux-4.2/net/ipv4/tcp.c linux-4.2-pck/net/ipv4/tcp.c
--- linux-4.2/net/ipv4/tcp.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/net/ipv4/tcp.c	2015-09-08 14:26:11.173759241 -0300
@@ -269,6 +269,7 @@
 #include <linux/crypto.h>
 #include <linux/time.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 #include <net/icmp.h>
 #include <net/inet_common.h>
@@ -2313,6 +2314,43 @@
 	return 0;
 }
 
+#ifdef CONFIG_TCP_STEALTH
+int tcp_stealth_integrity(__be16 *hash, u8 *secret, u8 *payload, int len)
+{
+	struct scatterlist sg[2];
+	struct crypto_hash *tfm;
+	struct hash_desc desc;
+	__be16 h[MD5_DIGEST_WORDS * 2];
+	int i;
+	int err = 0;
+
+	tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
+		err = -PTR_ERR(tfm);
+		goto out;
+	}
+	desc.tfm = tfm;
+	desc.flags = 0;
+
+	sg_init_table(sg, 2);
+	sg_set_buf(&sg[0], secret, MD5_MESSAGE_BYTES);
+	sg_set_buf(&sg[1], payload, len);
+
+	if (crypto_hash_digest(&desc, sg, MD5_MESSAGE_BYTES + len, (u8 *)h)) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	*hash = be16_to_cpu(h[0]);
+	for (i = 1; i < MD5_DIGEST_WORDS * 2; i++)
+		*hash ^= be16_to_cpu(h[i]);
+
+out:
+	crypto_free_hash(tfm);
+	return err;
+}
+#endif
+
 /*
  *	Socket option code for TCP.
  */
@@ -2343,6 +2381,66 @@
 		release_sock(sk);
 		return err;
 	}
+#ifdef CONFIG_TCP_STEALTH
+	case TCP_STEALTH: {
+		u8 secret[MD5_MESSAGE_BYTES] = { 0 };
+
+		val = copy_from_user(secret, optval,
+				     min_t(unsigned int, optlen,
+					   MD5_MESSAGE_BYTES));
+		if (val != 0)
+			return -EFAULT;
+
+		lock_sock(sk);
+		memcpy(tp->stealth.secret, secret, MD5_MESSAGE_BYTES);
+		tp->stealth.mode = TCP_STEALTH_MODE_AUTH;
+		tp->stealth.mstamp.v64 = 0;
+		tp->stealth.saw_tsval = false;
+		release_sock(sk);
+		return err;
+	}
+	case TCP_STEALTH_INTEGRITY: {
+		u8 *payload;
+
+		lock_sock(sk);
+
+		if (!(tp->stealth.mode & TCP_STEALTH_MODE_AUTH)) {
+			err = -EOPNOTSUPP;
+			goto stealth_integrity_out_1;
+		}
+
+		if (optlen < 1 || optlen > USHRT_MAX) {
+			err = -EINVAL;
+			goto stealth_integrity_out_1;
+		}
+
+		payload = vmalloc(optlen);
+		if (!payload) {
+			err = -ENOMEM;
+			goto stealth_integrity_out_1;
+		}
+
+		val = copy_from_user(payload, optval, optlen);
+		if (val != 0) {
+			err = -EFAULT;
+			goto stealth_integrity_out_2;
+		}
+
+		err = tcp_stealth_integrity(&tp->stealth.integrity_hash,
+					    tp->stealth.secret, payload,
+					    optlen);
+		if (err)
+			goto stealth_integrity_out_2;
+
+		tp->stealth.mode |= TCP_STEALTH_MODE_INTEGRITY;
+
+stealth_integrity_out_2:
+		vfree(payload);
+stealth_integrity_out_1:
+		release_sock(sk);
+		return err;
+	}
+#endif
 	default:
 		/* fallthru */
 		break;
@@ -2594,6 +2692,18 @@
 		tp->notsent_lowat = val;
 		sk->sk_write_space(sk);
 		break;
+#ifdef CONFIG_TCP_STEALTH
+	case TCP_STEALTH_INTEGRITY_LEN:
+		if (!(tp->stealth.mode & TCP_STEALTH_MODE_AUTH)) {
+			err = -EOPNOTSUPP;
+		} else if (val < 1 || val > USHRT_MAX) {
+			err = -EINVAL;
+		} else {
+			tp->stealth.integrity_len = val;
+			tp->stealth.mode |= TCP_STEALTH_MODE_INTEGRITY_LEN;
+		}
+		break;
+#endif
 	default:
 		err = -ENOPROTOOPT;
 		break;
diff -Nur linux-4.2/net/ipv4/tcp_input.c linux-4.2-pck/net/ipv4/tcp_input.c
--- linux-4.2/net/ipv4/tcp_input.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/net/ipv4/tcp_input.c	2015-09-08 00:48:31.504590523 -0300
@@ -77,6 +77,9 @@
 #include <linux/errqueue.h>
 
 int sysctl_tcp_timestamps __read_mostly = 1;
+#ifdef CONFIG_TCP_STEALTH
+EXPORT_SYMBOL(sysctl_tcp_timestamps);
+#endif
 int sysctl_tcp_window_scaling __read_mostly = 1;
 int sysctl_tcp_sack __read_mostly = 1;
 int sysctl_tcp_fack __read_mostly = 1;
@@ -3796,6 +3799,47 @@
 	return true;
 }
 
+#ifdef CONFIG_TCP_STEALTH
+/* Parse only the TSVal field of the TCP Timestamp option header.
+ */
+const bool tcp_parse_tsval_option(u32 *tsval, const struct tcphdr *th)
+{
+	int length = (th->doff << 2) - sizeof(*th);
+	const u8 *ptr = (const u8 *)(th + 1);
+
+	/* If the TCP option is too short, we can short cut */
+	if (length < TCPOLEN_TIMESTAMP)
+		return false;
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		switch (opcode) {
+		case TCPOPT_EOL:
+			return false;
+		case TCPOPT_NOP:
+			length--;
+			continue;
+		case TCPOPT_TIMESTAMP:
+			opsize = *ptr++;
+			if (opsize != TCPOLEN_TIMESTAMP || opsize > length)
+				return false;
+			*tsval = get_unaligned_be32(ptr);
+			return true;
+		default:
+			opsize = *ptr++;
+			if (opsize < 2 || opsize > length)
+				return false;
+		}
+		ptr += opsize - 2;
+		length -= opsize;
+	}
+	return false;
+}
+EXPORT_SYMBOL(tcp_parse_tsval_option);
+#endif
+
 #ifdef CONFIG_TCP_MD5SIG
 /*
  * Parse MD5 Signature option
@@ -4465,6 +4509,31 @@
 	return -ENOMEM;
 }
 
+#ifdef CONFIG_TCP_STEALTH
+static int __tcp_stealth_integrity_check(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcphdr *th = tcp_hdr(skb);
+	struct tcp_sock *tp = tcp_sk(sk);
+	u16 hash;
+	__be32 seq = cpu_to_be32(TCP_SKB_CB(skb)->seq - 1);
+	char *data = skb->data + th->doff * 4;
+	int len = skb->len - th->doff * 4;
+
+	if (len < tp->stealth.integrity_len)
+		return 1;
+
+	if (tcp_stealth_integrity(&hash, tp->stealth.secret, data,
+				  tp->stealth.integrity_len))
+		return 1;
+
+	if (be32_isn_to_be16_ih(seq) != cpu_to_be16(hash))
+		return 1;
+
+	tp->stealth.mode &= ~TCP_STEALTH_MODE_INTEGRITY_LEN;
+	return 0;
+}
+#endif
+
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -4474,6 +4543,14 @@
 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
 		goto drop;
 
+#ifdef CONFIG_TCP_STEALTH
+	if (unlikely(tp->stealth.mode & TCP_STEALTH_MODE_INTEGRITY_LEN) &&
+	    __tcp_stealth_integrity_check(sk, skb)) {
+		tcp_reset(sk);
+		goto drop;
+	}
+#endif
+
 	skb_dst_drop(skb);
 	__skb_pull(skb, tcp_hdr(skb)->doff * 4);
 
@@ -5246,6 +5323,15 @@
 			int eaten = 0;
 			bool fragstolen = false;
 
+#ifdef CONFIG_TCP_STEALTH
+			if (unlikely(tp->stealth.mode &
+				     TCP_STEALTH_MODE_INTEGRITY_LEN) &&
+			    __tcp_stealth_integrity_check(sk, skb)) {
+				tcp_reset(sk);
+				goto discard;
+			}
+#endif
+
 			if (tp->ucopy.task == current &&
 			    tp->copied_seq == tp->rcv_nxt &&
 			    len - tcp_header_len <= tp->ucopy.len &&
diff -Nur linux-4.2/net/ipv4/tcp_ipv4.c linux-4.2-pck/net/ipv4/tcp_ipv4.c
--- linux-4.2/net/ipv4/tcp_ipv4.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/net/ipv4/tcp_ipv4.c	2015-09-08 00:48:31.504590523 -0300
@@ -75,6 +75,7 @@
 #include <net/secure_seq.h>
 #include <net/tcp_memcontrol.h>
 #include <net/busy_poll.h>
+#include <net/secure_seq.h>
 
 #include <linux/inet.h>
 #include <linux/ipv6.h>
@@ -235,6 +236,21 @@
 	sk->sk_gso_type = SKB_GSO_TCPV4;
 	sk_setup_caps(sk, &rt->dst);
 
+#ifdef CONFIG_TCP_STEALTH
+	/* If CONFIG_TCP_STEALTH is defined, we need to know the timestamp as
+	 * early as possible and thus move taking the snapshot of tcp_time_stamp
+	 * here.
+	 */
+	skb_mstamp_get(&tp->stealth.mstamp);
+
+	if (!tp->write_seq && likely(!tp->repair) &&
+	    unlikely(tp->stealth.mode & TCP_STEALTH_MODE_AUTH))
+		tp->write_seq = tcp_stealth_sequence_number(sk,
+					&inet->inet_daddr,
+					sizeof(inet->inet_daddr),
+					usin->sin_port);
+#endif
+
 	if (!tp->write_seq && likely(!tp->repair))
 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 							   inet->inet_daddr,
@@ -1382,6 +1398,8 @@
  */
 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcphdr *th = tcp_hdr(skb);
 	struct sock *rsk;
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1403,6 +1421,15 @@
 	if (tcp_checksum_complete(skb))
 		goto csum_err;
 
+#ifdef CONFIG_TCP_STEALTH
+	if (sk->sk_state == TCP_LISTEN && th->syn && !th->fin &&
+	    unlikely(tp->stealth.mode & TCP_STEALTH_MODE_AUTH) &&
+	    tcp_stealth_do_auth(sk, skb)) {
+		rsk = sk;
+		goto reset;
+	}
+#endif
+
 	if (sk->sk_state == TCP_LISTEN) {
 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
 		if (!nsk)
diff -Nur linux-4.2/net/ipv4/tcp_output.c linux-4.2-pck/net/ipv4/tcp_output.c
--- linux-4.2/net/ipv4/tcp_output.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/net/ipv4/tcp_output.c	2015-09-08 00:48:31.504590523 -0300
@@ -939,6 +939,13 @@
 	tcb = TCP_SKB_CB(skb);
 	memset(&opts, 0, sizeof(opts));
 
+#ifdef TCP_STEALTH
+	if (unlikely(tcb->tcp_flags & TCPHDR_SYN &&
+		     tp->stealth.mode & TCP_STEALTH_MODE_AUTH)) {
+		skb->skb_mstamp = tp->stealth.mstamp;
+	}
+#endif
+
 	if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
 		tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
 	else
@@ -3251,7 +3258,15 @@
 		return -ENOBUFS;
 
 	tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
+#ifdef CONFIG_TCP_STEALTH
+	/* The timetamp was already made at the time the ISN was generated
+	 * as we need to know its value in the stealth_tcp_sequence_number()
+	 * function.
+	 */
+	tp->retrans_stamp = tp->stealth.mstamp.stamp_jiffies;
+#else
 	tp->retrans_stamp = tcp_time_stamp;
+#endif
 	tcp_connect_queue_skb(sk, buff);
 	tcp_ecn_send_syn(sk, buff);
 
diff -Nur linux-4.2/net/ipv6/tcp_ipv6.c linux-4.2-pck/net/ipv6/tcp_ipv6.c
--- linux-4.2/net/ipv6/tcp_ipv6.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/net/ipv6/tcp_ipv6.c	2015-09-08 00:48:31.504590523 -0300
@@ -63,6 +63,7 @@
 #include <net/secure_seq.h>
 #include <net/tcp_memcontrol.h>
 #include <net/busy_poll.h>
+#include <net/secure_seq.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -278,6 +279,21 @@
 
 	ip6_set_txhash(sk);
 
+#ifdef CONFIG_TCP_STEALTH
+	/* If CONFIG_TCP_STEALTH is defined, we need to know the timestamp as
+	 * early as possible and thus move taking the snapshot of tcp_time_stamp
+	 * here.
+	 */
+	skb_mstamp_get(&tp->stealth.mstamp);
+
+	if (!tp->write_seq && likely(!tp->repair) &&
+	    unlikely(tp->stealth.mode & TCP_STEALTH_MODE_AUTH))
+		tp->write_seq = tcp_stealth_sequence_number(sk,
+					sk->sk_v6_daddr.s6_addr32,
+					sizeof(sk->sk_v6_daddr),
+					inet->inet_dport);
+#endif
+
 	if (!tp->write_seq && likely(!tp->repair))
 		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
 							     sk->sk_v6_daddr.s6_addr32,
@@ -1191,7 +1207,8 @@
 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct tcp_sock *tp;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcphdr *th = tcp_hdr(skb);
 	struct sk_buff *opt_skb = NULL;
 
 	/* Imagine: socket is IPv6. IPv4 packet arrives,
@@ -1251,6 +1268,13 @@
 	if (tcp_checksum_complete(skb))
 		goto csum_err;
 
+#ifdef CONFIG_TCP_STEALTH
+	if (sk->sk_state == TCP_LISTEN && th->syn && !th->fin &&
+	    tp->stealth.mode & TCP_STEALTH_MODE_AUTH &&
+	    tcp_stealth_do_auth(sk, skb))
+		goto reset;
+#endif
+
 	if (sk->sk_state == TCP_LISTEN) {
 		struct sock *nsk = tcp_v6_hnd_req(sk, skb);
 		if (!nsk)
diff -Nur linux-4.2/net/netfilter/nf_conntrack_core.c linux-4.2-pck/net/netfilter/nf_conntrack_core.c
--- linux-4.2/net/netfilter/nf_conntrack_core.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/net/netfilter/nf_conntrack_core.c	2015-09-08 00:48:22.241696581 -0300
@@ -320,12 +320,13 @@
 }
 EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
 
-static void nf_ct_tmpl_free(struct nf_conn *tmpl)
+void nf_ct_tmpl_free(struct nf_conn *tmpl)
 {
 	nf_ct_ext_destroy(tmpl);
 	nf_ct_ext_free(tmpl);
 	kfree(tmpl);
 }
+EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
 
 static void
 destroy_conntrack(struct nf_conntrack *nfct)
diff -Nur linux-4.2/net/netfilter/nf_synproxy_core.c linux-4.2-pck/net/netfilter/nf_synproxy_core.c
--- linux-4.2/net/netfilter/nf_synproxy_core.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/net/netfilter/nf_synproxy_core.c	2015-09-08 00:48:22.241696581 -0300
@@ -378,7 +378,7 @@
 err3:
 	free_percpu(snet->stats);
 err2:
-	nf_conntrack_free(ct);
+	nf_ct_tmpl_free(ct);
 err1:
 	return err;
 }
diff -Nur linux-4.2/net/netfilter/xt_CT.c linux-4.2-pck/net/netfilter/xt_CT.c
--- linux-4.2/net/netfilter/xt_CT.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/net/netfilter/xt_CT.c	2015-09-08 00:48:22.241696581 -0300
@@ -233,7 +233,7 @@
 	return 0;
 
 err3:
-	nf_conntrack_free(ct);
+	nf_ct_tmpl_free(ct);
 err2:
 	nf_ct_l3proto_module_put(par->family);
 err1:
diff -Nur linux-4.2/samples/Kconfig linux-4.2-pck/samples/Kconfig
--- linux-4.2/samples/Kconfig	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/samples/Kconfig	2015-09-08 00:48:22.241696581 -0300
@@ -55,6 +55,13 @@
 	  Build an example of how to dynamically add the hello
 	  command to the kdb shell.
 
+config SAMPLE_KDBUS
+	bool "Build kdbus API example"
+	depends on KDBUS
+	help
+	  Build an example of how the kdbus API can be used from
+	  userspace.
+
 config SAMPLE_RPMSG_CLIENT
 	tristate "Build rpmsg client sample -- loadable modules only"
 	depends on RPMSG && m
diff -Nur linux-4.2/samples/Makefile linux-4.2-pck/samples/Makefile
--- linux-4.2/samples/Makefile	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/samples/Makefile	2015-09-08 00:48:22.241696581 -0300
@@ -1,4 +1,5 @@
 # Makefile for Linux samples code
 
 obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ livepatch/ \
-			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/
+			   hw_breakpoint/ kfifo/ kdb/ kdbus/ hidraw/ rpmsg/ \
+			   seccomp/
diff -Nur linux-4.2/samples/kdbus/.gitignore linux-4.2-pck/samples/kdbus/.gitignore
--- linux-4.2/samples/kdbus/.gitignore	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/samples/kdbus/.gitignore	2015-09-08 00:48:22.241696581 -0300
@@ -0,0 +1 @@
+kdbus-workers
diff -Nur linux-4.2/samples/kdbus/Makefile linux-4.2-pck/samples/kdbus/Makefile
--- linux-4.2/samples/kdbus/Makefile	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/samples/kdbus/Makefile	2015-09-08 00:48:22.241696581 -0300
@@ -0,0 +1,9 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+hostprogs-$(CONFIG_SAMPLE_KDBUS) += kdbus-workers
+
+always := $(hostprogs-y)
+
+HOSTCFLAGS_kdbus-workers.o += -I$(objtree)/usr/include
+HOSTLOADLIBES_kdbus-workers := -lrt
diff -Nur linux-4.2/samples/kdbus/kdbus-api.h linux-4.2-pck/samples/kdbus/kdbus-api.h
--- linux-4.2/samples/kdbus/kdbus-api.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/samples/kdbus/kdbus-api.h	2015-09-08 00:48:22.241696581 -0300
@@ -0,0 +1,114 @@
+#ifndef KDBUS_API_H
+#define KDBUS_API_H
+
+#include <sys/ioctl.h>
+#include <linux/kdbus.h>
+
+#define KDBUS_ALIGN8(l) (((l) + 7) & ~7)
+#define KDBUS_ITEM_HEADER_SIZE offsetof(struct kdbus_item, data)
+#define KDBUS_ITEM_SIZE(s) KDBUS_ALIGN8((s) + KDBUS_ITEM_HEADER_SIZE)
+#define KDBUS_ITEM_NEXT(item) \
+	(typeof(item))((uint8_t *)(item) + KDBUS_ALIGN8((item)->size))
+#define KDBUS_FOREACH(iter, first, _size)				\
+	for ((iter) = (first);						\
+	     ((uint8_t *)(iter) < (uint8_t *)(first) + (_size)) &&	\
+	       ((uint8_t *)(iter) >= (uint8_t *)(first));		\
+	     (iter) = (void *)((uint8_t *)(iter) + KDBUS_ALIGN8((iter)->size)))
+
+static inline int kdbus_cmd_bus_make(int control_fd, struct kdbus_cmd *cmd)
+{
+	int ret = ioctl(control_fd, KDBUS_CMD_BUS_MAKE, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_endpoint_make(int bus_fd, struct kdbus_cmd *cmd)
+{
+	int ret = ioctl(bus_fd, KDBUS_CMD_ENDPOINT_MAKE, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_endpoint_update(int ep_fd, struct kdbus_cmd *cmd)
+{
+	int ret = ioctl(ep_fd, KDBUS_CMD_ENDPOINT_UPDATE, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_hello(int bus_fd, struct kdbus_cmd_hello *cmd)
+{
+	int ret = ioctl(bus_fd, KDBUS_CMD_HELLO, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_update(int fd, struct kdbus_cmd *cmd)
+{
+	int ret = ioctl(fd, KDBUS_CMD_UPDATE, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_byebye(int conn_fd, struct kdbus_cmd *cmd)
+{
+	int ret = ioctl(conn_fd, KDBUS_CMD_BYEBYE, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_free(int conn_fd, struct kdbus_cmd_free *cmd)
+{
+	int ret = ioctl(conn_fd, KDBUS_CMD_FREE, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_conn_info(int conn_fd, struct kdbus_cmd_info *cmd)
+{
+	int ret = ioctl(conn_fd, KDBUS_CMD_CONN_INFO, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_bus_creator_info(int conn_fd, struct kdbus_cmd_info *cmd)
+{
+	int ret = ioctl(conn_fd, KDBUS_CMD_BUS_CREATOR_INFO, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_list(int fd, struct kdbus_cmd_list *cmd)
+{
+	int ret = ioctl(fd, KDBUS_CMD_LIST, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_send(int conn_fd, struct kdbus_cmd_send *cmd)
+{
+	int ret = ioctl(conn_fd, KDBUS_CMD_SEND, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_recv(int conn_fd, struct kdbus_cmd_recv *cmd)
+{
+	int ret = ioctl(conn_fd, KDBUS_CMD_RECV, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_name_acquire(int conn_fd, struct kdbus_cmd *cmd)
+{
+	int ret = ioctl(conn_fd, KDBUS_CMD_NAME_ACQUIRE, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_name_release(int conn_fd, struct kdbus_cmd *cmd)
+{
+	int ret = ioctl(conn_fd, KDBUS_CMD_NAME_RELEASE, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_match_add(int conn_fd, struct kdbus_cmd_match *cmd)
+{
+	int ret = ioctl(conn_fd, KDBUS_CMD_MATCH_ADD, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_match_remove(int conn_fd, struct kdbus_cmd_match *cmd)
+{
+	int ret = ioctl(conn_fd, KDBUS_CMD_MATCH_REMOVE, cmd);
+	return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+#endif /* KDBUS_API_H */
diff -Nur linux-4.2/samples/kdbus/kdbus-workers.c linux-4.2-pck/samples/kdbus/kdbus-workers.c
--- linux-4.2/samples/kdbus/kdbus-workers.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/samples/kdbus/kdbus-workers.c	2015-09-08 00:48:22.241696581 -0300
@@ -0,0 +1,1346 @@
+/*
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+/*
+ * Example: Workers
+ * This program computes prime-numbers based on the sieve of Eratosthenes. The
+ * master sets up a shared memory region and spawns workers which clear out the
+ * non-primes. The master reacts to keyboard input and to client-requests to
+ * control what each worker does. Note that this is in no way meant as efficient
+ * way to compute primes. It should only serve as example how a master/worker
+ * concept can be implemented with kdbus used as control messages.
+ *
+ * The main process is called the 'master'. It creates a new, private bus which
+ * will be used between the master and its workers to communicate. The master
+ * then spawns a fixed number of workers. Whenever a worker dies (detected via
+ * SIGCHLD), the master spawns a new worker. When done, the master waits for all
+ * workers to exit, prints a status report and exits itself.
+ *
+ * The master process does *not* keep track of its workers. Instead, this
+ * example implements a PULL model. That is, the master acquires a well-known
+ * name on the bus which each worker uses to request tasks from the master. If
+ * there are no more tasks, the master will return an empty task-list, which
+ * casues a worker to exit immediately.
+ *
+ * As tasks can be computationally expensive, we support cancellation. Whenever
+ * the master process is interrupted, it will drop its well-known name on the
+ * bus. This causes kdbus to broadcast a name-change notification. The workers
+ * check for broadcast messages regularly and will exit if they receive one.
+ *
+ * This example exists of 4 objects:
+ *  * master: The master object contains the context of the master process. This
+ *            process manages the prime-context, spawns workers and assigns
+ *            prime-ranges to each worker to compute.
+ *            The master itself does not do any prime-computations itself.
+ *  * child:  The child object contains the context of a worker. It inherits the
+ *            prime context from its parent (the master) and then creates a new
+ *            bus context to request prime-ranges to compute.
+ *  * prime:  The "prime" object is used to abstract how we compute primes. When
+ *            allocated, it prepares a memory region to hold 1 bit for each
+ *            natural number up to a fixed maximum ('MAX_PRIMES').
+ *            The memory region is backed by a memfd which we share between
+ *            processes. Each worker now gets assigned a range of natural
+ *            numbers which it clears multiples of off the memory region. The
+ *            master process is responsible of distributing all natural numbers
+ *            up to the fixed maximum to its workers.
+ *  * bus:    The bus object is an abstraction of the kdbus API. It is pretty
+ *            straightfoward and only manages the connection-fd plus the
+ *            memory-mapped pool in a single object.
+ *
+ * This example is in reversed order, which should make it easier to read
+ * top-down, but requires some forward-declarations. Just ignore those.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+
+/* glibc < 2.7 does not ship sys/signalfd.h */
+/* we require kernels with __NR_memfd_create */
+#if __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 7 && defined(__NR_memfd_create)
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/memfd.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/poll.h>
+#include <sys/signalfd.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include "kdbus-api.h"
+
+/* FORWARD DECLARATIONS */
+
+#define POOL_SIZE (16 * 1024 * 1024)
+#define MAX_PRIMES (2UL << 24)
+#define WORKER_COUNT (16)
+#define PRIME_STEPS (65536 * 4)
+
+static const char *arg_busname = "example-workers";
+static const char *arg_modname = "kdbus";
+static const char *arg_master = "org.freedesktop.master";
+
+static int err_assert(int r_errno, const char *msg, const char *func, int line,
+		      const char *file)
+{
+	r_errno = (r_errno != 0) ? -abs(r_errno) : -EFAULT;
+	if (r_errno < 0) {
+		errno = -r_errno;
+		fprintf(stderr, "ERR: %s: %m (%s:%d in %s)\n",
+			msg, func, line, file);
+	}
+	return r_errno;
+}
+
+#define err_r(_r, _msg) err_assert((_r), (_msg), __func__, __LINE__, __FILE__)
+#define err(_msg) err_r(errno, (_msg))
+
+struct prime;
+struct bus;
+struct master;
+struct child;
+
+struct prime {
+	int fd;
+	uint8_t *area;
+	size_t max;
+	size_t done;
+	size_t status;
+};
+
+static int prime_new(struct prime **out);
+static void prime_free(struct prime *p);
+static bool prime_done(struct prime *p);
+static void prime_consume(struct prime *p, size_t amount);
+static int prime_run(struct prime *p, struct bus *cancel, size_t number);
+static void prime_print(struct prime *p);
+
+struct bus {
+	int fd;
+	uint8_t *pool;
+};
+
+static int bus_open_connection(struct bus **out, uid_t uid, const char *name,
+			       uint64_t recv_flags);
+static void bus_close_connection(struct bus *b);
+static void bus_poool_free_slice(struct bus *b, uint64_t offset);
+static int bus_acquire_name(struct bus *b, const char *name);
+static int bus_install_name_loss_match(struct bus *b, const char *name);
+static int bus_poll(struct bus *b);
+static int bus_make(uid_t uid, const char *name);
+
+struct master {
+	size_t n_workers;
+	size_t max_workers;
+
+	int signal_fd;
+	int control_fd;
+
+	struct prime *prime;
+	struct bus *bus;
+};
+
+static int master_new(struct master **out);
+static void master_free(struct master *m);
+static int master_run(struct master *m);
+static int master_poll(struct master *m);
+static int master_handle_stdin(struct master *m);
+static int master_handle_signal(struct master *m);
+static int master_handle_bus(struct master *m);
+static int master_reply(struct master *m, const struct kdbus_msg *msg);
+static int master_waitpid(struct master *m);
+static int master_spawn(struct master *m);
+
+struct child {
+	struct bus *bus;
+	struct prime *prime;
+};
+
+static int child_new(struct child **out, struct prime *p);
+static void child_free(struct child *c);
+static int child_run(struct child *c);
+
+/* END OF FORWARD DECLARATIONS */
+
+/*
+ * This is the main entrypoint of this example. It is pretty straightforward. We
+ * create a master object, run the computation, print a status report and then
+ * exit. Nothing particularly interesting here, so lets look into the master
+ * object...
+ */
+int main(int argc, char **argv)
+{
+	struct master *m = NULL;
+	int r;
+
+	r = master_new(&m);
+	if (r < 0)
+		goto out;
+
+	r = master_run(m);
+	if (r < 0)
+		goto out;
+
+	if (0)
+		prime_print(m->prime);
+
+out:
+	master_free(m);
+	if (r < 0 && r != -EINTR)
+		fprintf(stderr, "failed\n");
+	else
+		fprintf(stderr, "done\n");
+	return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
+/*
+ * ...this will allocate a new master context. It keeps track of the current
+ * number of children/workers that are running, manages a signalfd to track
+ * SIGCHLD, and creates a private kdbus bus. Afterwards, it opens its connection
+ * to the bus and acquires a well known-name (arg_master).
+ */
+static int master_new(struct master **out)
+{
+	struct master *m;
+	sigset_t smask;
+	int r;
+
+	m = calloc(1, sizeof(*m));
+	if (!m)
+		return err("cannot allocate master");
+
+	m->max_workers = WORKER_COUNT;
+	m->signal_fd = -1;
+	m->control_fd = -1;
+
+	/* Block SIGINT and SIGCHLD signals */
+	sigemptyset(&smask);
+	sigaddset(&smask, SIGINT);
+	sigaddset(&smask, SIGCHLD);
+	sigprocmask(SIG_BLOCK, &smask, NULL);
+
+	m->signal_fd = signalfd(-1, &smask, SFD_CLOEXEC);
+	if (m->signal_fd < 0) {
+		r = err("cannot create signalfd");
+		goto error;
+	}
+
+	r = prime_new(&m->prime);
+	if (r < 0)
+		goto error;
+
+	m->control_fd = bus_make(getuid(), arg_busname);
+	if (m->control_fd < 0) {
+		r = m->control_fd;
+		goto error;
+	}
+
+	/*
+	 * Open a bus connection for the master, and require each received
+	 * message to have a metadata item of type KDBUS_ITEM_PIDS attached.
+	 * The current UID is needed to compute the name of the bus node to
+	 * connect to.
+	 */
+	r = bus_open_connection(&m->bus, getuid(),
+				arg_busname, KDBUS_ATTACH_PIDS);
+	if (r < 0)
+		goto error;
+
+	/*
+	 * Acquire a well-known name on the bus, so children can address
+	 * messages to the master using KDBUS_DST_ID_NAME as destination-ID
+	 * of messages.
+	 */
+	r = bus_acquire_name(m->bus, arg_master);
+	if (r < 0)
+		goto error;
+
+	*out = m;
+	return 0;
+
+error:
+	master_free(m);
+	return r;
+}
+
+/* pretty straightforward destructor of a master object */
+static void master_free(struct master *m)
+{
+	if (!m)
+		return;
+
+	bus_close_connection(m->bus);
+	if (m->control_fd >= 0)
+		close(m->control_fd);
+	prime_free(m->prime);
+	if (m->signal_fd >= 0)
+		close(m->signal_fd);
+	free(m);
+}
+
+static int master_run(struct master *m)
+{
+	int res, r = 0;
+
+	while (!prime_done(m->prime)) {
+		while (m->n_workers < m->max_workers) {
+			r = master_spawn(m);
+			if (r < 0)
+				break;
+		}
+
+		r = master_poll(m);
+		if (r < 0)
+			break;
+	}
+
+	if (r < 0) {
+		bus_close_connection(m->bus);
+		m->bus = NULL;
+	}
+
+	while (m->n_workers > 0) {
+		res = master_poll(m);
+		if (res < 0) {
+			if (m->bus) {
+				bus_close_connection(m->bus);
+				m->bus = NULL;
+			}
+			r = res;
+		}
+	}
+
+	return r == -EINTR ? 0 : r;
+}
+
+static int master_poll(struct master *m)
+{
+	struct pollfd fds[3] = {};
+	int r = 0, n = 0;
+
+	/*
+	 * Add stdin, the eventfd and the connection owner file descriptor to
+	 * the pollfd table, and handle incoming traffic on the latter in
+	 * master_handle_bus().
+	 */
+	fds[n].fd = STDIN_FILENO;
+	fds[n++].events = POLLIN;
+	fds[n].fd = m->signal_fd;
+	fds[n++].events = POLLIN;
+	if (m->bus) {
+		fds[n].fd = m->bus->fd;
+		fds[n++].events = POLLIN;
+	}
+
+	r = poll(fds, n, -1);
+	if (r < 0)
+		return err("poll() failed");
+
+	if (fds[0].revents & POLLIN)
+		r = master_handle_stdin(m);
+	else if (fds[0].revents)
+		r = err("ERR/HUP on stdin");
+	if (r < 0)
+		return r;
+
+	if (fds[1].revents & POLLIN)
+		r = master_handle_signal(m);
+	else if (fds[1].revents)
+		r = err("ERR/HUP on signalfd");
+	if (r < 0)
+		return r;
+
+	if (fds[2].revents & POLLIN)
+		r = master_handle_bus(m);
+	else if (fds[2].revents)
+		r = err("ERR/HUP on bus");
+
+	return r;
+}
+
+static int master_handle_stdin(struct master *m)
+{
+	char buf[128];
+	ssize_t l;
+	int r = 0;
+
+	l = read(STDIN_FILENO, buf, sizeof(buf));
+	if (l < 0)
+		return err("cannot read stdin");
+	if (l == 0)
+		return err_r(-EINVAL, "EOF on stdin");
+
+	while (l-- > 0) {
+		switch (buf[l]) {
+		case 'q':
+			/* quit */
+			r = -EINTR;
+			break;
+		case '\n':
+		case ' ':
+			/* ignore */
+			break;
+		default:
+			if (isgraph(buf[l]))
+				fprintf(stderr, "invalid input '%c'\n", buf[l]);
+			else
+				fprintf(stderr, "invalid input 0x%x\n", buf[l]);
+			break;
+		}
+	}
+
+	return r;
+}
+
+static int master_handle_signal(struct master *m)
+{
+	struct signalfd_siginfo val;
+	ssize_t l;
+
+	l = read(m->signal_fd, &val, sizeof(val));
+	if (l < 0)
+		return err("cannot read signalfd");
+	if (l != sizeof(val))
+		return err_r(-EINVAL, "invalid data from signalfd");
+
+	switch (val.ssi_signo) {
+	case SIGCHLD:
+		return master_waitpid(m);
+	case SIGINT:
+		return err_r(-EINTR, "interrupted");
+	default:
+		return err_r(-EINVAL, "caught invalid signal");
+	}
+}
+
+static int master_handle_bus(struct master *m)
+{
+	struct kdbus_cmd_recv recv = { .size = sizeof(recv) };
+	const struct kdbus_msg *msg = NULL;
+	const struct kdbus_item *item;
+	const struct kdbus_vec *vec = NULL;
+	int r = 0;
+
+	/*
+	 * To receive a message, the KDBUS_CMD_RECV ioctl is used.
+	 * It takes an argument of type 'struct kdbus_cmd_recv', which
+	 * will contain information on the received message when the call
+	 * returns. See kdbus.message(7).
+	 */
+	r = kdbus_cmd_recv(m->bus->fd, &recv);
+	/*
+	 * EAGAIN is returned when there is no message waiting on this
+	 * connection. This is not an error - simply bail out.
+	 */
+	if (r == -EAGAIN)
+		return 0;
+	if (r < 0)
+		return err_r(r, "cannot receive message");
+
+	/*
+	 * Messages received by a connection are stored inside the connection's
+	 * pool, at an offset that has been returned in the 'recv' command
+	 * struct above. The value describes the relative offset from the
+	 * start address of the pool. A message is described with
+	 * 'struct kdbus_msg'. See kdbus.message(7).
+	 */
+	msg = (void *)(m->bus->pool + recv.msg.offset);
+
+	/*
+	 * A messages describes its actual payload in an array of items.
+	 * KDBUS_FOREACH() is a simple iterator that walks such an array.
+	 * struct kdbus_msg has a field to denote its total size, which is
+	 * needed to determine the number of items in the array.
+	 */
+	KDBUS_FOREACH(item, msg->items,
+		      msg->size - offsetof(struct kdbus_msg, items)) {
+		/*
+		 * An item of type PAYLOAD_OFF describes in-line memory
+		 * stored in the pool at a described offset. That offset is
+		 * relative to the start address of the message header.
+		 * This example program only expects one single item of that
+		 * type, remembers the struct kdbus_vec member of the item
+		 * when it sees it, and bails out if there is more than one
+		 * of them.
+		 */
+		if (item->type == KDBUS_ITEM_PAYLOAD_OFF) {
+			if (vec) {
+				r = err_r(-EEXIST,
+					  "message with multiple vecs");
+				break;
+			}
+			vec = &item->vec;
+			if (vec->size != 1) {
+				r = err_r(-EINVAL, "invalid message size");
+				break;
+			}
+
+		/*
+		 * MEMFDs are transported as items of type PAYLOAD_MEMFD.
+		 * If such an item is attached, a new file descriptor was
+		 * installed into the task when KDBUS_CMD_RECV was called, and
+		 * its number is stored in item->memfd.fd.
+		 * Implementers *must* handle this item type and close the
+		 * file descriptor when no longer needed in order to prevent
+		 * file descriptor exhaustion. This example program just bails
+		 * out with an error in this case, as memfds are not expected
+		 * in this context.
+		 */
+		} else if (item->type == KDBUS_ITEM_PAYLOAD_MEMFD) {
+			r = err_r(-EINVAL, "message with memfd");
+			break;
+		}
+	}
+	if (r < 0)
+		goto exit;
+	if (!vec) {
+		r = err_r(-EINVAL, "empty message");
+		goto exit;
+	}
+
+	switch (*((const uint8_t *)msg + vec->offset)) {
+	case 'r': {
+		r = master_reply(m, msg);
+		break;
+	}
+	default:
+		r = err_r(-EINVAL, "invalid message type");
+		break;
+	}
+
+exit:
+	/*
+	 * We are done with the memory slice that was given to us through
+	 * recv.msg.offset. Tell the kernel it can use it for other content
+	 * in the future. See kdbus.pool(7).
+	 */
+	bus_poool_free_slice(m->bus, recv.msg.offset);
+	return r;
+}
+
+static int master_reply(struct master *m, const struct kdbus_msg *msg)
+{
+	struct kdbus_cmd_send cmd;
+	struct kdbus_item *item;
+	struct kdbus_msg *reply;
+	size_t size, status, p[2];
+	int r;
+
+	/*
+	 * This functions sends a message over kdbus. To do this, it uses the
+	 * KDBUS_CMD_SEND ioctl, which takes a command struct argument of type
+	 * 'struct kdbus_cmd_send'. This struct stores a pointer to the actual
+	 * message to send. See kdbus.message(7).
+	 */
+	p[0] = m->prime->done;
+	p[1] = prime_done(m->prime) ? 0 : PRIME_STEPS;
+
+	size = sizeof(*reply);
+	size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_vec));
+
+	/* Prepare the message to send */
+	reply = alloca(size);
+	memset(reply, 0, size);
+	reply->size = size;
+
+	/* Each message has a cookie that can be used to send replies */
+	reply->cookie = 1;
+
+	/* The payload_type is arbitrary, but it must be non-zero */
+	reply->payload_type = 0xdeadbeef;
+
+	/*
+	 * We are sending a reply. Let the kernel know the cookie of the
+	 * message we are replying to.
+	 */
+	reply->cookie_reply = msg->cookie;
+
+	/*
+	 * Messages can either be directed to a well-known name (stored as
+	 * string) or to a unique name (stored as number). This example does
+	 * the latter. If the message would be directed to a well-known name
+	 * instead, the message's dst_id field would be set to
+	 * KDBUS_DST_ID_NAME, and the name would be attaches in an item of type
+	 * KDBUS_ITEM_DST_NAME. See below for an example, and also refer to
+	 * kdbus.message(7).
+	 */
+	reply->dst_id = msg->src_id;
+
+	/* Our message has exactly one item to store its payload */
+	item = reply->items;
+	item->type = KDBUS_ITEM_PAYLOAD_VEC;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+	item->vec.address = (uintptr_t)p;
+	item->vec.size = sizeof(p);
+
+	/*
+	 * Now prepare the command struct, and reference the message we want
+	 * to send.
+	 */
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)reply;
+
+	/*
+	 * Finally, employ the command on the connection owner
+	 * file descriptor.
+	 */
+	r = kdbus_cmd_send(m->bus->fd, &cmd);
+	if (r < 0)
+		return err_r(r, "cannot send reply");
+
+	if (p[1]) {
+		prime_consume(m->prime, p[1]);
+		status = m->prime->done * 10000 / m->prime->max;
+		if (status != m->prime->status) {
+			m->prime->status = status;
+			fprintf(stderr, "status: %7.3lf%%\n",
+				(double)status / 100);
+		}
+	}
+
+	return 0;
+}
+
+static int master_waitpid(struct master *m)
+{
+	pid_t pid;
+	int r;
+
+	while ((pid = waitpid(-1, &r, WNOHANG)) > 0) {
+		if (m->n_workers > 0)
+			--m->n_workers;
+		if (!WIFEXITED(r))
+			r = err_r(-EINVAL, "child died unexpectedly");
+		else if (WEXITSTATUS(r) != 0)
+			r = err_r(-WEXITSTATUS(r), "child failed");
+	}
+
+	return r;
+}
+
+static int master_spawn(struct master *m)
+{
+	struct child *c = NULL;
+	struct prime *p = NULL;
+	pid_t pid;
+	int r;
+
+	/* Spawn off one child and call child_run() inside it */
+
+	pid = fork();
+	if (pid < 0)
+		return err("cannot fork");
+	if (pid > 0) {
+		/* parent */
+		++m->n_workers;
+		return 0;
+	}
+
+	/* child */
+
+	p = m->prime;
+	m->prime = NULL;
+	master_free(m);
+
+	r = child_new(&c, p);
+	if (r < 0)
+		goto exit;
+
+	r = child_run(c);
+
+exit:
+	child_free(c);
+	exit(abs(r));
+}
+
+static int child_new(struct child **out, struct prime *p)
+{
+	struct child *c;
+	int r;
+
+	c = calloc(1, sizeof(*c));
+	if (!c)
+		return err("cannot allocate child");
+
+	c->prime = p;
+
+	/*
+	 * Open a connection to the bus and require each received message to
+	 * carry a list of the well-known names the sendind connection currently
+	 * owns. The current UID is needed in order to determine the name of the
+	 * bus node to connect to.
+	 */
+	r = bus_open_connection(&c->bus, getuid(),
+				arg_busname, KDBUS_ATTACH_NAMES);
+	if (r < 0)
+		goto error;
+
+	/*
+	 * Install a kdbus match so the child's connection gets notified when
+	 * the master loses its well-known name.
+	 */
+	r = bus_install_name_loss_match(c->bus, arg_master);
+	if (r < 0)
+		goto error;
+
+	*out = c;
+	return 0;
+
+error:
+	child_free(c);
+	return r;
+}
+
+static void child_free(struct child *c)
+{
+	if (!c)
+		return;
+
+	bus_close_connection(c->bus);
+	prime_free(c->prime);
+	free(c);
+}
+
+static int child_run(struct child *c)
+{
+	struct kdbus_cmd_send cmd;
+	struct kdbus_item *item;
+	struct kdbus_vec *vec = NULL;
+	struct kdbus_msg *msg;
+	struct timespec spec;
+	size_t n, steps, size;
+	int r = 0;
+
+	/*
+	 * Let's send a message to the master and ask for work. To do this,
+	 * we use the KDBUS_CMD_SEND ioctl, which takes an argument of type
+	 * 'struct kdbus_cmd_send'. This struct stores a pointer to the actual
+	 * message to send. See kdbus.message(7).
+	 */
+	size = sizeof(*msg);
+	size += KDBUS_ITEM_SIZE(strlen(arg_master) + 1);
+	size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_vec));
+
+	msg = alloca(size);
+	memset(msg, 0, size);
+	msg->size = size;
+
+	/*
+	 * Tell the kernel that we expect a reply to this message. This means
+	 * that
+	 *
+	 * a) The remote peer will gain temporary permission to talk to us
+	 *    even if it would not be allowed to normally.
+	 *
+	 * b) A timeout value is required.
+	 *
+	 *    For asynchronous send commands, if no reply is received, we will
+	 *    get a kernel notification with an item of type
+	 *    KDBUS_ITEM_REPLY_TIMEOUT attached.
+	 *
+	 *    For synchronous send commands (which this example does), the
+	 *    ioctl will block until a reply is received or the timeout is
+	 *    exceeded.
+	 */
+	msg->flags = KDBUS_MSG_EXPECT_REPLY;
+
+	/* Set our cookie. Replies must use this cookie to send their reply. */
+	msg->cookie = 1;
+
+	/* The payload_type is arbitrary, but it must be non-zero */
+	msg->payload_type = 0xdeadbeef;
+
+	/*
+	 * We are sending our message to the current owner of a well-known
+	 * name. This makes an item of type KDBUS_ITEM_DST_NAME mandatory.
+	 */
+	msg->dst_id = KDBUS_DST_ID_NAME;
+
+	/*
+	 * Set the reply timeout to 5 seconds. Timeouts are always set in
+	 * absolute timestamps, based con CLOCK_MONOTONIC. See kdbus.message(7).
+	 */
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &spec);
+	msg->timeout_ns += (5 + spec.tv_sec) * 1000ULL * 1000ULL * 1000ULL;
+	msg->timeout_ns += spec.tv_nsec;
+
+	/*
+	 * Fill the appended items. First, set the well-known name of the
+	 * destination we want to talk to.
+	 */
+	item = msg->items;
+	item->type = KDBUS_ITEM_DST_NAME;
+	item->size = KDBUS_ITEM_HEADER_SIZE + strlen(arg_master) + 1;
+	strcpy(item->str, arg_master);
+
+	/*
+	 * The 2nd item contains a vector to memory we want to send. It
+	 * can be content of any type. In our case, we're sending a one-byte
+	 * string only. The memory referenced by this item will be copied into
+	 * the pool of the receiver connection, and does not need to be valid
+	 * after the command is employed.
+	 */
+	item = KDBUS_ITEM_NEXT(item);
+	item->type = KDBUS_ITEM_PAYLOAD_VEC;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+	item->vec.address = (uintptr_t)"r";
+	item->vec.size = 1;
+
+	/* Set up the command struct and reference the message we prepared */
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)msg;
+
+	/*
+	 * The send commands knows a mode in which it will block until a
+	 * reply to a message is received. This example uses that mode.
+	 * The pool offset to the received reply will be stored in the command
+	 * struct after the send command returned. See below.
+	 */
+	cmd.flags = KDBUS_SEND_SYNC_REPLY;
+
+	/*
+	 * Finally, employ the command on the connection owner
+	 * file descriptor.
+	 */
+	r = kdbus_cmd_send(c->bus->fd, &cmd);
+	if (r == -ESRCH || r == -EPIPE || r == -ECONNRESET)
+		return 0;
+	if (r < 0)
+		return err_r(r, "cannot send request to master");
+
+	/*
+	 * The command was sent with the KDBUS_SEND_SYNC_REPLY flag set,
+	 * and returned successfully, which means that cmd.reply.offset now
+	 * points to a message inside our connection's pool where the reply
+	 * is found. This is equivalent to receiving the reply with
+	 * KDBUS_CMD_RECV, but it doesn't require waiting for the reply with
+	 * poll() and also saves the ioctl to receive the message.
+	 */
+	msg = (void *)(c->bus->pool + cmd.reply.offset);
+
+	/*
+	 * A messages describes its actual payload in an array of items.
+	 * KDBUS_FOREACH() is a simple iterator that walks such an array.
+	 * struct kdbus_msg has a field to denote its total size, which is
+	 * needed to determine the number of items in the array.
+	 */
+	KDBUS_FOREACH(item, msg->items,
+		      msg->size - offsetof(struct kdbus_msg, items)) {
+		/*
+		 * An item of type PAYLOAD_OFF describes in-line memory
+		 * stored in the pool at a described offset. That offset is
+		 * relative to the start address of the message header.
+		 * This example program only expects one single item of that
+		 * type, remembers the struct kdbus_vec member of the item
+		 * when it sees it, and bails out if there is more than one
+		 * of them.
+		 */
+		if (item->type == KDBUS_ITEM_PAYLOAD_OFF) {
+			if (vec) {
+				r = err_r(-EEXIST,
+					  "message with multiple vecs");
+				break;
+			}
+			vec = &item->vec;
+			if (vec->size != 2 * sizeof(size_t)) {
+				r = err_r(-EINVAL, "invalid message size");
+				break;
+			}
+		/*
+		 * MEMFDs are transported as items of type PAYLOAD_MEMFD.
+		 * If such an item is attached, a new file descriptor was
+		 * installed into the task when KDBUS_CMD_RECV was called, and
+		 * its number is stored in item->memfd.fd.
+		 * Implementers *must* handle this item type close the
+		 * file descriptor when no longer needed in order to prevent
+		 * file descriptor exhaustion. This example program just bails
+		 * out with an error in this case, as memfds are not expected
+		 * in this context.
+		 */
+		} else if (item->type == KDBUS_ITEM_PAYLOAD_MEMFD) {
+			r = err_r(-EINVAL, "message with memfd");
+			break;
+		}
+	}
+	if (r < 0)
+		goto exit;
+	if (!vec) {
+		r = err_r(-EINVAL, "empty message");
+		goto exit;
+	}
+
+	n = ((size_t *)((const uint8_t *)msg + vec->offset))[0];
+	steps = ((size_t *)((const uint8_t *)msg + vec->offset))[1];
+
+	while (steps-- > 0) {
+		++n;
+		r = prime_run(c->prime, c->bus, n);
+		if (r < 0)
+			break;
+		r = bus_poll(c->bus);
+		if (r != 0) {
+			r = r < 0 ? r : -EINTR;
+			break;
+		}
+	}
+
+exit:
+	/*
+	 * We are done with the memory slice that was given to us through
+	 * cmd.reply.offset. Tell the kernel it can use it for other content
+	 * in the future. See kdbus.pool(7).
+	 */
+	bus_poool_free_slice(c->bus, cmd.reply.offset);
+	return r;
+}
+
+/*
+ * Prime Computation
+ *
+ */
+
+static int prime_new(struct prime **out)
+{
+	struct prime *p;
+	int r;
+
+	p = calloc(1, sizeof(*p));
+	if (!p)
+		return err("cannot allocate prime memory");
+
+	p->fd = -1;
+	p->area = MAP_FAILED;
+	p->max = MAX_PRIMES;
+
+	/*
+	 * Prepare and map a memfd to store the bit-fields for the number
+	 * ranges we want to perform the prime detection on.
+	 */
+	p->fd = syscall(__NR_memfd_create, "prime-area", MFD_CLOEXEC);
+	if (p->fd < 0) {
+		r = err("cannot create memfd");
+		goto error;
+	}
+
+	r = ftruncate(p->fd, p->max / 8 + 1);
+	if (r < 0) {
+		r = err("cannot ftruncate area");
+		goto error;
+	}
+
+	p->area = mmap(NULL, p->max / 8 + 1, PROT_READ | PROT_WRITE,
+		       MAP_SHARED, p->fd, 0);
+	if (p->area == MAP_FAILED) {
+		r = err("cannot mmap memfd");
+		goto error;
+	}
+
+	*out = p;
+	return 0;
+
+error:
+	prime_free(p);
+	return r;
+}
+
+static void prime_free(struct prime *p)
+{
+	if (!p)
+		return;
+
+	if (p->area != MAP_FAILED)
+		munmap(p->area, p->max / 8 + 1);
+	if (p->fd >= 0)
+		close(p->fd);
+	free(p);
+}
+
+static bool prime_done(struct prime *p)
+{
+	return p->done >= p->max;
+}
+
+static void prime_consume(struct prime *p, size_t amount)
+{
+	p->done += amount;
+}
+
+static int prime_run(struct prime *p, struct bus *cancel, size_t number)
+{
+	size_t i, n = 0;
+	int r;
+
+	if (number < 2 || number > 65535)
+		return 0;
+
+	for (i = number * number;
+	     i < p->max && i > number;
+	     i += number) {
+		p->area[i / 8] |= 1 << (i % 8);
+
+		if (!(++n % (1 << 20))) {
+			r = bus_poll(cancel);
+			if (r != 0)
+				return r < 0 ? r : -EINTR;
+		}
+	}
+
+	return 0;
+}
+
+static void prime_print(struct prime *p)
+{
+	size_t i, l = 0;
+
+	fprintf(stderr, "PRIMES:");
+	for (i = 0; i < p->max; ++i) {
+		if (!(p->area[i / 8] & (1 << (i % 8))))
+			fprintf(stderr, "%c%7zu", !(l++ % 16) ? '\n' : ' ', i);
+	}
+	fprintf(stderr, "\nEND\n");
+}
+
+static int bus_open_connection(struct bus **out, uid_t uid, const char *name,
+			       uint64_t recv_flags)
+{
+	struct kdbus_cmd_hello hello;
+	char path[128];
+	struct bus *b;
+	int r;
+
+	/*
+	 * The 'bus' object is our representation of a kdbus connection which
+	 * stores two details: the connection owner file descriptor, and the
+	 * mmap()ed memory of its associated pool. See kdbus.connection(7) and
+	 * kdbus.pool(7).
+	 */
+	b = calloc(1, sizeof(*b));
+	if (!b)
+		return err("cannot allocate bus memory");
+
+	b->fd = -1;
+	b->pool = MAP_FAILED;
+
+	/* Compute the name of the bus node to connect to. */
+	snprintf(path, sizeof(path), "/sys/fs/%s/%lu-%s/bus",
+		 arg_modname, (unsigned long)uid, name);
+	b->fd = open(path, O_RDWR | O_CLOEXEC);
+	if (b->fd < 0) {
+		r = err("cannot open bus");
+		goto error;
+	}
+
+	/*
+	 * To make a connection to the bus, the KDBUS_CMD_HELLO ioctl is used.
+	 * It takes an argument of type 'struct kdbus_cmd_hello'.
+	 */
+	memset(&hello, 0, sizeof(hello));
+	hello.size = sizeof(hello);
+
+	/*
+	 * Specify a mask of metadata attach flags, describing metadata items
+	 * that this new connection allows to be sent.
+	 */
+	hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+
+	/*
+	 * Specify a mask of metadata attach flags, describing metadata items
+	 * that this new connection wants to be receive along with each message.
+	 */
+	hello.attach_flags_recv = recv_flags;
+
+	/*
+	 * A connection may choose the size of its pool, but the number has to
+	 * comply with two rules: a) it must be greater than 0, and b) it must
+	 * be a mulitple of PAGE_SIZE. See kdbus.pool(7).
+	 */
+	hello.pool_size = POOL_SIZE;
+
+	/*
+	 * Now employ the command on the file descriptor opened above.
+	 * This command will turn the file descriptor into a connection-owner
+	 * file descriptor that controls the life-time of the connection; once
+	 * it's closed, the connection is shut down.
+	 */
+	r = kdbus_cmd_hello(b->fd, &hello);
+	if (r < 0) {
+		err_r(r, "HELLO failed");
+		goto error;
+	}
+
+	bus_poool_free_slice(b, hello.offset);
+
+	/*
+	 * Map the pool of the connection. Its size has been set in the
+	 * command struct above. See kdbus.pool(7).
+	 */
+	b->pool = mmap(NULL, POOL_SIZE, PROT_READ, MAP_SHARED, b->fd, 0);
+	if (b->pool == MAP_FAILED) {
+		r = err("cannot mmap pool");
+		goto error;
+	}
+
+	*out = b;
+	return 0;
+
+error:
+	bus_close_connection(b);
+	return r;
+}
+
+static void bus_close_connection(struct bus *b)
+{
+	if (!b)
+		return;
+
+	/*
+	 * A bus connection is closed by simply calling close() on the
+	 * connection owner file descriptor. The unique name and all owned
+	 * well-known names of the conneciton will disappear.
+	 * See kdbus.connection(7).
+	 */
+	if (b->pool != MAP_FAILED)
+		munmap(b->pool, POOL_SIZE);
+	if (b->fd >= 0)
+		close(b->fd);
+	free(b);
+}
+
+static void bus_poool_free_slice(struct bus *b, uint64_t offset)
+{
+	struct kdbus_cmd_free cmd = {
+		.size = sizeof(cmd),
+		.offset = offset,
+	};
+	int r;
+
+	/*
+	 * Once we're done with a piece of pool memory that was returned
+	 * by a command, we have to call the KDBUS_CMD_FREE ioctl on it so it
+	 * can be reused. The command takes an argument of type
+	 * 'struct kdbus_cmd_free', in which the pool offset of the slice to
+	 * free is stored. The ioctl is employed on the connection owner
+	 * file descriptor. See kdbus.pool(7),
+	 */
+	r = kdbus_cmd_free(b->fd, &cmd);
+	if (r < 0)
+		err_r(r, "cannot free pool slice");
+}
+
+static int bus_acquire_name(struct bus *b, const char *name)
+{
+	struct kdbus_item *item;
+	struct kdbus_cmd *cmd;
+	size_t size;
+	int r;
+
+	/*
+	 * This function acquires a well-known name on the bus through the
+	 * KDBUS_CMD_NAME_ACQUIRE ioctl. This ioctl takes an argument of type
+	 * 'struct kdbus_cmd', which is assembled below. See kdbus.name(7).
+	 */
+	size = sizeof(*cmd);
+	size += KDBUS_ITEM_SIZE(strlen(name) + 1);
+
+	cmd = alloca(size);
+	memset(cmd, 0, size);
+	cmd->size = size;
+
+	/*
+	 * The command requires an item of type KDBUS_ITEM_NAME, and its
+	 * content must be a valid bus name.
+	 */
+	item = cmd->items;
+	item->type = KDBUS_ITEM_NAME;
+	item->size = KDBUS_ITEM_HEADER_SIZE + strlen(name) + 1;
+	strcpy(item->str, name);
+
+	/*
+	 * Employ the command on the connection owner file descriptor.
+	 */
+	r = kdbus_cmd_name_acquire(b->fd, cmd);
+	if (r < 0)
+		return err_r(r, "cannot acquire name");
+
+	return 0;
+}
+
+static int bus_install_name_loss_match(struct bus *b, const char *name)
+{
+	struct kdbus_cmd_match *match;
+	struct kdbus_item *item;
+	size_t size;
+	int r;
+
+	/*
+	 * In order to install a match for signal messages, we have to
+	 * assemble a 'struct kdbus_cmd_match' and use it along with the
+	 * KDBUS_CMD_MATCH_ADD ioctl. See kdbus.match(7).
+	 */
+	size = sizeof(*match);
+	size += KDBUS_ITEM_SIZE(sizeof(item->name_change) + strlen(name) + 1);
+
+	match = alloca(size);
+	memset(match, 0, size);
+	match->size = size;
+
+	/*
+	 * A match is comprised of many 'rules', each of which describes a
+	 * mandatory detail of the message. All rules of a match must be
+	 * satified in order to make a message pass.
+	 */
+	item = match->items;
+
+	/*
+	 * In this case, we're interested in notifications that inform us
+	 * about a well-known name being removed from the bus.
+	 */
+	item->type = KDBUS_ITEM_NAME_REMOVE;
+	item->size = KDBUS_ITEM_HEADER_SIZE +
+			sizeof(item->name_change) + strlen(name) + 1;
+
+	/*
+	 * We could limit the match further and require a specific unique-ID
+	 * to be the new or the old owner of the name. In this case, however,
+	 * we don't, and allow 'any' id.
+	 */
+	item->name_change.old_id.id = KDBUS_MATCH_ID_ANY;
+	item->name_change.new_id.id = KDBUS_MATCH_ID_ANY;
+
+	/* Copy in the well-known name we're interested in */
+	strcpy(item->name_change.name, name);
+
+	/*
+	 * Add the match through the KDBUS_CMD_MATCH_ADD ioctl, employed on
+	 * the connection owner fd.
+	 */
+	r = kdbus_cmd_match_add(b->fd, match);
+	if (r < 0)
+		return err_r(r, "cannot add match");
+
+	return 0;
+}
+
+static int bus_poll(struct bus *b)
+{
+	struct pollfd fds[1] = {};
+	int r;
+
+	/*
+	 * A connection endpoint supports poll() and will wake-up the
+	 * task with POLLIN set once a message has arrived.
+	 */
+	fds[0].fd = b->fd;
+	fds[0].events = POLLIN;
+	r = poll(fds, sizeof(fds) / sizeof(*fds), 0);
+	if (r < 0)
+		return err("cannot poll bus");
+	return !!(fds[0].revents & POLLIN);
+}
+
+static int bus_make(uid_t uid, const char *name)
+{
+	struct kdbus_item *item;
+	struct kdbus_cmd *make;
+	char path[128], busname[128];
+	size_t size;
+	int r, fd;
+
+	/*
+	 * Compute the full path to the 'control' node. 'arg_modname' may be
+	 * set to a different value than 'kdbus' for development purposes.
+	 * The 'control' node is the primary entry point to kdbus that must be
+	 * used in order to create a bus. See kdbus(7) and kdbus.bus(7).
+	 */
+	snprintf(path, sizeof(path), "/sys/fs/%s/control", arg_modname);
+
+	/*
+	 * Compute the bus name. A valid bus name must always be prefixed with
+	 * the EUID of the currently running process in order to avoid name
+	 * conflicts. See kdbus.bus(7).
+	 */
+	snprintf(busname, sizeof(busname), "%lu-%s", (unsigned long)uid, name);
+
+	fd = open(path, O_RDWR | O_CLOEXEC);
+	if (fd < 0)
+		return err("cannot open control file");
+
+	/*
+	 * The KDBUS_CMD_BUS_MAKE ioctl takes an argument of type
+	 * 'struct kdbus_cmd', and expects at least two items attached to
+	 * it: one to decribe the bloom parameters to be propagated to
+	 * connections of the bus, and the name of the bus that was computed
+	 * above. Assemble this struct now, and fill it with values.
+	 */
+	size = sizeof(*make);
+	size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_bloom_parameter));
+	size += KDBUS_ITEM_SIZE(strlen(busname) + 1);
+
+	make = alloca(size);
+	memset(make, 0, size);
+	make->size = size;
+
+	/*
+	 * Each item has a 'type' and 'size' field, and must be stored at an
+	 * 8-byte aligned address. The KDBUS_ITEM_NEXT macro is used to advance
+	 * the pointer. See kdbus.item(7) for more details.
+	 */
+	item = make->items;
+	item->type = KDBUS_ITEM_BLOOM_PARAMETER;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(item->bloom_parameter);
+	item->bloom_parameter.size = 8;
+	item->bloom_parameter.n_hash = 1;
+
+	/* The name of the new bus is stored in the next item. */
+	item = KDBUS_ITEM_NEXT(item);
+	item->type = KDBUS_ITEM_MAKE_NAME;
+	item->size = KDBUS_ITEM_HEADER_SIZE + strlen(busname) + 1;
+	strcpy(item->str, busname);
+
+	/*
+	 * Now create the bus via the KDBUS_CMD_BUS_MAKE ioctl and return the
+	 * fd that was used back to the caller of this function. This fd is now
+	 * called a 'bus owner file descriptor', and it controls the life-time
+	 * of the newly created bus; once the file descriptor is closed, the
+	 * bus goes away, and all connections are shut down. See kdbus.bus(7).
+	 */
+	r = kdbus_cmd_bus_make(fd, make);
+	if (r < 0) {
+		err_r(r, "cannot make bus");
+		close(fd);
+		return r;
+	}
+
+	return fd;
+}
+
+#else
+
+#warning "Skipping compilation due to unsupported libc version"
+
+int main(int argc, char **argv)
+{
+	fprintf(stderr,
+		"Compilation of %s was skipped due to unsupported libc.\n",
+		argv[0]);
+
+	return EXIT_FAILURE;
+}
+
+#endif /* libc sanity check */
diff -Nur linux-4.2/scripts/basic/.fixdep.cmd linux-4.2-pck/scripts/basic/.fixdep.cmd
--- linux-4.2/scripts/basic/.fixdep.cmd	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/basic/.fixdep.cmd	2015-09-08 01:26:14.022509469 -0300
@@ -0,0 +1,90 @@
+cmd_scripts/basic/fixdep := gcc -Wp,-MD,scripts/basic/.fixdep.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89     -o scripts/basic/fixdep scripts/basic/fixdep.c  
+
+source_scripts/basic/fixdep := scripts/basic/fixdep.c
+
+deps_scripts/basic/fixdep := \
+    $(wildcard include/config/his/driver.h) \
+    $(wildcard include/config/my/option.h) \
+    $(wildcard include/config/.h) \
+    $(wildcard include/config/foo.h) \
+    $(wildcard include/config/boom.h) \
+  /usr/include/stdc-predef.h \
+  /usr/include/sys/types.h \
+  /usr/include/features.h \
+  /usr/include/sys/cdefs.h \
+  /usr/include/bits/wordsize.h \
+  /usr/include/gnu/stubs.h \
+  /usr/include/gnu/stubs-64.h \
+  /usr/include/bits/types.h \
+  /usr/include/bits/typesizes.h \
+  /usr/include/time.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stddef.h \
+  /usr/include/endian.h \
+  /usr/include/bits/endian.h \
+  /usr/include/bits/byteswap.h \
+  /usr/include/bits/byteswap-16.h \
+  /usr/include/sys/select.h \
+  /usr/include/bits/select.h \
+  /usr/include/bits/sigset.h \
+  /usr/include/bits/time.h \
+  /usr/include/sys/sysmacros.h \
+  /usr/include/bits/pthreadtypes.h \
+  /usr/include/sys/stat.h \
+  /usr/include/bits/stat.h \
+  /usr/include/sys/mman.h \
+  /usr/include/bits/mman.h \
+  /usr/include/bits/mman-linux.h \
+  /usr/include/unistd.h \
+  /usr/include/bits/posix_opt.h \
+  /usr/include/bits/environments.h \
+  /usr/include/bits/confname.h \
+  /usr/include/getopt.h \
+  /usr/include/fcntl.h \
+  /usr/include/bits/fcntl.h \
+  /usr/include/bits/fcntl-linux.h \
+  /usr/include/string.h \
+  /usr/include/xlocale.h \
+  /usr/include/bits/string.h \
+  /usr/include/bits/string2.h \
+  /usr/include/stdlib.h \
+  /usr/include/bits/waitflags.h \
+  /usr/include/bits/waitstatus.h \
+  /usr/include/alloca.h \
+  /usr/include/bits/stdlib-bsearch.h \
+  /usr/include/bits/stdlib-float.h \
+  /usr/include/stdio.h \
+  /usr/include/libio.h \
+  /usr/include/_G_config.h \
+  /usr/include/wchar.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdarg.h \
+  /usr/include/bits/stdio_lim.h \
+  /usr/include/bits/sys_errlist.h \
+  /usr/include/bits/stdio.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include-fixed/limits.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include-fixed/syslimits.h \
+  /usr/include/limits.h \
+  /usr/include/bits/posix1_lim.h \
+  /usr/include/bits/local_lim.h \
+  /usr/include/linux/limits.h \
+  /usr/include/bits/posix2_lim.h \
+  /usr/include/ctype.h \
+  /usr/include/arpa/inet.h \
+  /usr/include/netinet/in.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdint.h \
+  /usr/include/stdint.h \
+  /usr/include/bits/wchar.h \
+  /usr/include/sys/socket.h \
+  /usr/include/sys/uio.h \
+  /usr/include/bits/uio.h \
+  /usr/include/bits/socket.h \
+  /usr/include/bits/socket_type.h \
+  /usr/include/bits/sockaddr.h \
+  /usr/include/asm/socket.h \
+  /usr/include/asm-generic/socket.h \
+  /usr/include/asm/sockios.h \
+  /usr/include/asm-generic/sockios.h \
+  /usr/include/bits/in.h \
+
+scripts/basic/fixdep: $(deps_scripts/basic/fixdep)
+
+$(deps_scripts/basic/fixdep):
Binary files linux-4.2/scripts/basic/fixdep and linux-4.2-pck/scripts/basic/fixdep differ
diff -Nur linux-4.2/scripts/kconfig/.mconf.cmd linux-4.2-pck/scripts/kconfig/.mconf.cmd
--- linux-4.2/scripts/kconfig/.mconf.cmd	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/.mconf.cmd	2015-09-08 01:26:19.482241328 -0300
@@ -0,0 +1 @@
+cmd_scripts/kconfig/mconf := gcc  -o scripts/kconfig/mconf scripts/kconfig/mconf.o scripts/kconfig/zconf.tab.o scripts/kconfig/lxdialog/checklist.o scripts/kconfig/lxdialog/util.o scripts/kconfig/lxdialog/inputbox.o scripts/kconfig/lxdialog/textbox.o scripts/kconfig/lxdialog/yesno.o scripts/kconfig/lxdialog/menubox.o  -lncursesw 
diff -Nur linux-4.2/scripts/kconfig/.mconf.o.cmd linux-4.2-pck/scripts/kconfig/.mconf.o.cmd
--- linux-4.2/scripts/kconfig/.mconf.o.cmd	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/.mconf.o.cmd	2015-09-08 01:26:14.832469692 -0300
@@ -0,0 +1,100 @@
+cmd_scripts/kconfig/mconf.o := gcc -Wp,-MD,scripts/kconfig/.mconf.o.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89    -DCURSES_LOC="<ncurses.h>" -DNCURSES_WIDECHAR=1 -DLOCALE   -c -o scripts/kconfig/mconf.o scripts/kconfig/mconf.c
+
+source_scripts/kconfig/mconf.o := scripts/kconfig/mconf.c
+
+deps_scripts/kconfig/mconf.o := \
+    $(wildcard include/config/mode.h) \
+    $(wildcard include/config/color.h) \
+    $(wildcard include/config/.h) \
+  /usr/include/stdc-predef.h \
+  /usr/include/ctype.h \
+  /usr/include/features.h \
+  /usr/include/sys/cdefs.h \
+  /usr/include/bits/wordsize.h \
+  /usr/include/gnu/stubs.h \
+  /usr/include/gnu/stubs-64.h \
+  /usr/include/bits/types.h \
+  /usr/include/bits/typesizes.h \
+  /usr/include/endian.h \
+  /usr/include/bits/endian.h \
+  /usr/include/bits/byteswap.h \
+  /usr/include/bits/byteswap-16.h \
+  /usr/include/xlocale.h \
+  /usr/include/errno.h \
+  /usr/include/bits/errno.h \
+  /usr/include/linux/errno.h \
+  /usr/include/asm/errno.h \
+  /usr/include/asm-generic/errno.h \
+  /usr/include/asm-generic/errno-base.h \
+  /usr/include/fcntl.h \
+  /usr/include/bits/fcntl.h \
+  /usr/include/bits/fcntl-linux.h \
+  /usr/include/time.h \
+  /usr/include/bits/stat.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include-fixed/limits.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include-fixed/syslimits.h \
+  /usr/include/limits.h \
+  /usr/include/bits/posix1_lim.h \
+  /usr/include/bits/local_lim.h \
+  /usr/include/linux/limits.h \
+  /usr/include/bits/posix2_lim.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdarg.h \
+  /usr/include/stdlib.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stddef.h \
+  /usr/include/bits/waitflags.h \
+  /usr/include/bits/waitstatus.h \
+  /usr/include/sys/types.h \
+  /usr/include/sys/select.h \
+  /usr/include/bits/select.h \
+  /usr/include/bits/sigset.h \
+  /usr/include/bits/time.h \
+  /usr/include/sys/sysmacros.h \
+  /usr/include/bits/pthreadtypes.h \
+  /usr/include/alloca.h \
+  /usr/include/bits/stdlib-bsearch.h \
+  /usr/include/bits/stdlib-float.h \
+  /usr/include/string.h \
+  /usr/include/bits/string.h \
+  /usr/include/bits/string2.h \
+  /usr/include/signal.h \
+  /usr/include/bits/signum.h \
+  /usr/include/bits/siginfo.h \
+  /usr/include/bits/sigaction.h \
+  /usr/include/bits/sigcontext.h \
+  /usr/include/bits/sigstack.h \
+  /usr/include/sys/ucontext.h \
+  /usr/include/bits/sigthread.h \
+  /usr/include/unistd.h \
+  /usr/include/bits/posix_opt.h \
+  /usr/include/bits/environments.h \
+  /usr/include/bits/confname.h \
+  /usr/include/getopt.h \
+  /usr/include/locale.h \
+  /usr/include/bits/locale.h \
+  scripts/kconfig/lkc.h \
+    $(wildcard include/config/prefix.h) \
+    $(wildcard include/config/list.h) \
+    $(wildcard include/config/y.h) \
+  scripts/kconfig/expr.h \
+    $(wildcard include/config/config.h) \
+  /usr/include/assert.h \
+  /usr/include/stdio.h \
+  /usr/include/libio.h \
+  /usr/include/_G_config.h \
+  /usr/include/wchar.h \
+  /usr/include/bits/stdio_lim.h \
+  /usr/include/bits/sys_errlist.h \
+  /usr/include/bits/stdio.h \
+  scripts/kconfig/list.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdbool.h \
+  /usr/include/libintl.h \
+  scripts/kconfig/lkc_proto.h \
+  scripts/kconfig/lxdialog/dialog.h \
+  /usr/include/curses.h \
+  /usr/include/ncurses_dll.h \
+  /usr/include/unctrl.h \
+  /usr/include/curses.h \
+
+scripts/kconfig/mconf.o: $(deps_scripts/kconfig/mconf.o)
+
+$(deps_scripts/kconfig/mconf.o):
diff -Nur linux-4.2/scripts/kconfig/.zconf.tab.o.cmd linux-4.2-pck/scripts/kconfig/.zconf.tab.o.cmd
--- linux-4.2/scripts/kconfig/.zconf.tab.o.cmd	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/.zconf.tab.o.cmd	2015-09-08 01:26:16.845704149 -0300
@@ -0,0 +1,101 @@
+cmd_scripts/kconfig/zconf.tab.o := gcc -Wp,-MD,scripts/kconfig/.zconf.tab.o.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89    -DCURSES_LOC="<ncurses.h>" -DNCURSES_WIDECHAR=1 -DLOCALE  -Iscripts/kconfig -c -o scripts/kconfig/zconf.tab.o scripts/kconfig/zconf.tab.c
+
+source_scripts/kconfig/zconf.tab.o := scripts/kconfig/zconf.tab.c
+
+deps_scripts/kconfig/zconf.tab.o := \
+  /usr/include/stdc-predef.h \
+  /usr/include/ctype.h \
+  /usr/include/features.h \
+  /usr/include/sys/cdefs.h \
+  /usr/include/bits/wordsize.h \
+  /usr/include/gnu/stubs.h \
+  /usr/include/gnu/stubs-64.h \
+  /usr/include/bits/types.h \
+  /usr/include/bits/typesizes.h \
+  /usr/include/endian.h \
+  /usr/include/bits/endian.h \
+  /usr/include/bits/byteswap.h \
+  /usr/include/bits/byteswap-16.h \
+  /usr/include/xlocale.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdarg.h \
+  /usr/include/stdio.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stddef.h \
+  /usr/include/libio.h \
+  /usr/include/_G_config.h \
+  /usr/include/wchar.h \
+  /usr/include/bits/stdio_lim.h \
+  /usr/include/bits/sys_errlist.h \
+  /usr/include/bits/stdio.h \
+  /usr/include/stdlib.h \
+  /usr/include/bits/waitflags.h \
+  /usr/include/bits/waitstatus.h \
+  /usr/include/sys/types.h \
+  /usr/include/time.h \
+  /usr/include/sys/select.h \
+  /usr/include/bits/select.h \
+  /usr/include/bits/sigset.h \
+  /usr/include/bits/time.h \
+  /usr/include/sys/sysmacros.h \
+  /usr/include/bits/pthreadtypes.h \
+  /usr/include/alloca.h \
+  /usr/include/bits/stdlib-bsearch.h \
+  /usr/include/bits/stdlib-float.h \
+  /usr/include/string.h \
+  /usr/include/bits/string.h \
+  /usr/include/bits/string2.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdbool.h \
+  scripts/kconfig/lkc.h \
+    $(wildcard include/config/.h) \
+    $(wildcard include/config/prefix.h) \
+    $(wildcard include/config/list.h) \
+    $(wildcard include/config/y.h) \
+  scripts/kconfig/expr.h \
+    $(wildcard include/config/config.h) \
+  /usr/include/assert.h \
+  scripts/kconfig/list.h \
+  /usr/include/libintl.h \
+  /usr/include/locale.h \
+  /usr/include/bits/locale.h \
+  scripts/kconfig/lkc_proto.h \
+  scripts/kconfig/zconf.hash.c \
+  scripts/kconfig/zconf.lex.c \
+  /usr/include/errno.h \
+  /usr/include/bits/errno.h \
+  /usr/include/linux/errno.h \
+  /usr/include/asm/errno.h \
+  /usr/include/asm-generic/errno.h \
+  /usr/include/asm-generic/errno-base.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include-fixed/limits.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include-fixed/syslimits.h \
+  /usr/include/limits.h \
+  /usr/include/bits/posix1_lim.h \
+  /usr/include/bits/local_lim.h \
+  /usr/include/linux/limits.h \
+  /usr/include/bits/posix2_lim.h \
+  /usr/include/unistd.h \
+  /usr/include/bits/posix_opt.h \
+  /usr/include/bits/environments.h \
+  /usr/include/bits/confname.h \
+  /usr/include/getopt.h \
+  scripts/kconfig/util.c \
+  scripts/kconfig/confdata.c \
+    $(wildcard include/config/autoconfig.h) \
+    $(wildcard include/config/overwriteconfig.h) \
+    $(wildcard include/config/autoheader.h) \
+    $(wildcard include/config/tristate.h) \
+    $(wildcard include/config/probability.h) \
+  /usr/include/sys/stat.h \
+  /usr/include/bits/stat.h \
+  /usr/include/fcntl.h \
+  /usr/include/bits/fcntl.h \
+  /usr/include/bits/fcntl-linux.h \
+  scripts/kconfig/expr.c \
+  scripts/kconfig/symbol.c \
+  /usr/include/regex.h \
+  /usr/include/sys/utsname.h \
+  /usr/include/bits/utsname.h \
+  scripts/kconfig/menu.c \
+
+scripts/kconfig/zconf.tab.o: $(deps_scripts/kconfig/zconf.tab.o)
+
+$(deps_scripts/kconfig/zconf.tab.o):
diff -Nur linux-4.2/scripts/kconfig/lxdialog/.checklist.o.cmd linux-4.2-pck/scripts/kconfig/lxdialog/.checklist.o.cmd
--- linux-4.2/scripts/kconfig/lxdialog/.checklist.o.cmd	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/lxdialog/.checklist.o.cmd	2015-09-08 01:26:17.362345442 -0300
@@ -0,0 +1,67 @@
+cmd_scripts/kconfig/lxdialog/checklist.o := gcc -Wp,-MD,scripts/kconfig/lxdialog/.checklist.o.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89    -DCURSES_LOC="<ncurses.h>" -DNCURSES_WIDECHAR=1 -DLOCALE   -c -o scripts/kconfig/lxdialog/checklist.o scripts/kconfig/lxdialog/checklist.c
+
+source_scripts/kconfig/lxdialog/checklist.o := scripts/kconfig/lxdialog/checklist.c
+
+deps_scripts/kconfig/lxdialog/checklist.o := \
+  /usr/include/stdc-predef.h \
+  scripts/kconfig/lxdialog/dialog.h \
+  /usr/include/sys/types.h \
+  /usr/include/features.h \
+  /usr/include/sys/cdefs.h \
+  /usr/include/bits/wordsize.h \
+  /usr/include/gnu/stubs.h \
+  /usr/include/gnu/stubs-64.h \
+  /usr/include/bits/types.h \
+  /usr/include/bits/typesizes.h \
+  /usr/include/time.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stddef.h \
+  /usr/include/endian.h \
+  /usr/include/bits/endian.h \
+  /usr/include/bits/byteswap.h \
+  /usr/include/bits/byteswap-16.h \
+  /usr/include/sys/select.h \
+  /usr/include/bits/select.h \
+  /usr/include/bits/sigset.h \
+  /usr/include/bits/time.h \
+  /usr/include/sys/sysmacros.h \
+  /usr/include/bits/pthreadtypes.h \
+  /usr/include/fcntl.h \
+  /usr/include/bits/fcntl.h \
+  /usr/include/bits/fcntl-linux.h \
+  /usr/include/bits/stat.h \
+  /usr/include/unistd.h \
+  /usr/include/bits/posix_opt.h \
+  /usr/include/bits/environments.h \
+  /usr/include/bits/confname.h \
+  /usr/include/getopt.h \
+  /usr/include/ctype.h \
+  /usr/include/xlocale.h \
+  /usr/include/stdlib.h \
+  /usr/include/bits/waitflags.h \
+  /usr/include/bits/waitstatus.h \
+  /usr/include/alloca.h \
+  /usr/include/bits/stdlib-bsearch.h \
+  /usr/include/bits/stdlib-float.h \
+  /usr/include/string.h \
+  /usr/include/bits/string.h \
+  /usr/include/bits/string2.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdbool.h \
+  /usr/include/libintl.h \
+  /usr/include/locale.h \
+  /usr/include/bits/locale.h \
+  /usr/include/curses.h \
+  /usr/include/ncurses_dll.h \
+  /usr/include/stdio.h \
+  /usr/include/libio.h \
+  /usr/include/_G_config.h \
+  /usr/include/wchar.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdarg.h \
+  /usr/include/bits/stdio_lim.h \
+  /usr/include/bits/sys_errlist.h \
+  /usr/include/bits/stdio.h \
+  /usr/include/unctrl.h \
+  /usr/include/curses.h \
+
+scripts/kconfig/lxdialog/checklist.o: $(deps_scripts/kconfig/lxdialog/checklist.o)
+
+$(deps_scripts/kconfig/lxdialog/checklist.o):
diff -Nur linux-4.2/scripts/kconfig/lxdialog/.inputbox.o.cmd linux-4.2-pck/scripts/kconfig/lxdialog/.inputbox.o.cmd
--- linux-4.2/scripts/kconfig/lxdialog/.inputbox.o.cmd	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/lxdialog/.inputbox.o.cmd	2015-09-08 01:26:18.375629008 -0300
@@ -0,0 +1,67 @@
+cmd_scripts/kconfig/lxdialog/inputbox.o := gcc -Wp,-MD,scripts/kconfig/lxdialog/.inputbox.o.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89    -DCURSES_LOC="<ncurses.h>" -DNCURSES_WIDECHAR=1 -DLOCALE   -c -o scripts/kconfig/lxdialog/inputbox.o scripts/kconfig/lxdialog/inputbox.c
+
+source_scripts/kconfig/lxdialog/inputbox.o := scripts/kconfig/lxdialog/inputbox.c
+
+deps_scripts/kconfig/lxdialog/inputbox.o := \
+  /usr/include/stdc-predef.h \
+  scripts/kconfig/lxdialog/dialog.h \
+  /usr/include/sys/types.h \
+  /usr/include/features.h \
+  /usr/include/sys/cdefs.h \
+  /usr/include/bits/wordsize.h \
+  /usr/include/gnu/stubs.h \
+  /usr/include/gnu/stubs-64.h \
+  /usr/include/bits/types.h \
+  /usr/include/bits/typesizes.h \
+  /usr/include/time.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stddef.h \
+  /usr/include/endian.h \
+  /usr/include/bits/endian.h \
+  /usr/include/bits/byteswap.h \
+  /usr/include/bits/byteswap-16.h \
+  /usr/include/sys/select.h \
+  /usr/include/bits/select.h \
+  /usr/include/bits/sigset.h \
+  /usr/include/bits/time.h \
+  /usr/include/sys/sysmacros.h \
+  /usr/include/bits/pthreadtypes.h \
+  /usr/include/fcntl.h \
+  /usr/include/bits/fcntl.h \
+  /usr/include/bits/fcntl-linux.h \
+  /usr/include/bits/stat.h \
+  /usr/include/unistd.h \
+  /usr/include/bits/posix_opt.h \
+  /usr/include/bits/environments.h \
+  /usr/include/bits/confname.h \
+  /usr/include/getopt.h \
+  /usr/include/ctype.h \
+  /usr/include/xlocale.h \
+  /usr/include/stdlib.h \
+  /usr/include/bits/waitflags.h \
+  /usr/include/bits/waitstatus.h \
+  /usr/include/alloca.h \
+  /usr/include/bits/stdlib-bsearch.h \
+  /usr/include/bits/stdlib-float.h \
+  /usr/include/string.h \
+  /usr/include/bits/string.h \
+  /usr/include/bits/string2.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdbool.h \
+  /usr/include/libintl.h \
+  /usr/include/locale.h \
+  /usr/include/bits/locale.h \
+  /usr/include/curses.h \
+  /usr/include/ncurses_dll.h \
+  /usr/include/stdio.h \
+  /usr/include/libio.h \
+  /usr/include/_G_config.h \
+  /usr/include/wchar.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdarg.h \
+  /usr/include/bits/stdio_lim.h \
+  /usr/include/bits/sys_errlist.h \
+  /usr/include/bits/stdio.h \
+  /usr/include/unctrl.h \
+  /usr/include/curses.h \
+
+scripts/kconfig/lxdialog/inputbox.o: $(deps_scripts/kconfig/lxdialog/inputbox.o)
+
+$(deps_scripts/kconfig/lxdialog/inputbox.o):
diff -Nur linux-4.2/scripts/kconfig/lxdialog/.menubox.o.cmd linux-4.2-pck/scripts/kconfig/lxdialog/.menubox.o.cmd
--- linux-4.2/scripts/kconfig/lxdialog/.menubox.o.cmd	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/lxdialog/.menubox.o.cmd	2015-09-08 01:26:19.415577936 -0300
@@ -0,0 +1,67 @@
+cmd_scripts/kconfig/lxdialog/menubox.o := gcc -Wp,-MD,scripts/kconfig/lxdialog/.menubox.o.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89    -DCURSES_LOC="<ncurses.h>" -DNCURSES_WIDECHAR=1 -DLOCALE   -c -o scripts/kconfig/lxdialog/menubox.o scripts/kconfig/lxdialog/menubox.c
+
+source_scripts/kconfig/lxdialog/menubox.o := scripts/kconfig/lxdialog/menubox.c
+
+deps_scripts/kconfig/lxdialog/menubox.o := \
+  /usr/include/stdc-predef.h \
+  scripts/kconfig/lxdialog/dialog.h \
+  /usr/include/sys/types.h \
+  /usr/include/features.h \
+  /usr/include/sys/cdefs.h \
+  /usr/include/bits/wordsize.h \
+  /usr/include/gnu/stubs.h \
+  /usr/include/gnu/stubs-64.h \
+  /usr/include/bits/types.h \
+  /usr/include/bits/typesizes.h \
+  /usr/include/time.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stddef.h \
+  /usr/include/endian.h \
+  /usr/include/bits/endian.h \
+  /usr/include/bits/byteswap.h \
+  /usr/include/bits/byteswap-16.h \
+  /usr/include/sys/select.h \
+  /usr/include/bits/select.h \
+  /usr/include/bits/sigset.h \
+  /usr/include/bits/time.h \
+  /usr/include/sys/sysmacros.h \
+  /usr/include/bits/pthreadtypes.h \
+  /usr/include/fcntl.h \
+  /usr/include/bits/fcntl.h \
+  /usr/include/bits/fcntl-linux.h \
+  /usr/include/bits/stat.h \
+  /usr/include/unistd.h \
+  /usr/include/bits/posix_opt.h \
+  /usr/include/bits/environments.h \
+  /usr/include/bits/confname.h \
+  /usr/include/getopt.h \
+  /usr/include/ctype.h \
+  /usr/include/xlocale.h \
+  /usr/include/stdlib.h \
+  /usr/include/bits/waitflags.h \
+  /usr/include/bits/waitstatus.h \
+  /usr/include/alloca.h \
+  /usr/include/bits/stdlib-bsearch.h \
+  /usr/include/bits/stdlib-float.h \
+  /usr/include/string.h \
+  /usr/include/bits/string.h \
+  /usr/include/bits/string2.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdbool.h \
+  /usr/include/libintl.h \
+  /usr/include/locale.h \
+  /usr/include/bits/locale.h \
+  /usr/include/curses.h \
+  /usr/include/ncurses_dll.h \
+  /usr/include/stdio.h \
+  /usr/include/libio.h \
+  /usr/include/_G_config.h \
+  /usr/include/wchar.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdarg.h \
+  /usr/include/bits/stdio_lim.h \
+  /usr/include/bits/sys_errlist.h \
+  /usr/include/bits/stdio.h \
+  /usr/include/unctrl.h \
+  /usr/include/curses.h \
+
+scripts/kconfig/lxdialog/menubox.o: $(deps_scripts/kconfig/lxdialog/menubox.o)
+
+$(deps_scripts/kconfig/lxdialog/menubox.o):
diff -Nur linux-4.2/scripts/kconfig/lxdialog/.textbox.o.cmd linux-4.2-pck/scripts/kconfig/lxdialog/.textbox.o.cmd
--- linux-4.2/scripts/kconfig/lxdialog/.textbox.o.cmd	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/lxdialog/.textbox.o.cmd	2015-09-08 01:26:18.732278161 -0300
@@ -0,0 +1,67 @@
+cmd_scripts/kconfig/lxdialog/textbox.o := gcc -Wp,-MD,scripts/kconfig/lxdialog/.textbox.o.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89    -DCURSES_LOC="<ncurses.h>" -DNCURSES_WIDECHAR=1 -DLOCALE   -c -o scripts/kconfig/lxdialog/textbox.o scripts/kconfig/lxdialog/textbox.c
+
+source_scripts/kconfig/lxdialog/textbox.o := scripts/kconfig/lxdialog/textbox.c
+
+deps_scripts/kconfig/lxdialog/textbox.o := \
+  /usr/include/stdc-predef.h \
+  scripts/kconfig/lxdialog/dialog.h \
+  /usr/include/sys/types.h \
+  /usr/include/features.h \
+  /usr/include/sys/cdefs.h \
+  /usr/include/bits/wordsize.h \
+  /usr/include/gnu/stubs.h \
+  /usr/include/gnu/stubs-64.h \
+  /usr/include/bits/types.h \
+  /usr/include/bits/typesizes.h \
+  /usr/include/time.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stddef.h \
+  /usr/include/endian.h \
+  /usr/include/bits/endian.h \
+  /usr/include/bits/byteswap.h \
+  /usr/include/bits/byteswap-16.h \
+  /usr/include/sys/select.h \
+  /usr/include/bits/select.h \
+  /usr/include/bits/sigset.h \
+  /usr/include/bits/time.h \
+  /usr/include/sys/sysmacros.h \
+  /usr/include/bits/pthreadtypes.h \
+  /usr/include/fcntl.h \
+  /usr/include/bits/fcntl.h \
+  /usr/include/bits/fcntl-linux.h \
+  /usr/include/bits/stat.h \
+  /usr/include/unistd.h \
+  /usr/include/bits/posix_opt.h \
+  /usr/include/bits/environments.h \
+  /usr/include/bits/confname.h \
+  /usr/include/getopt.h \
+  /usr/include/ctype.h \
+  /usr/include/xlocale.h \
+  /usr/include/stdlib.h \
+  /usr/include/bits/waitflags.h \
+  /usr/include/bits/waitstatus.h \
+  /usr/include/alloca.h \
+  /usr/include/bits/stdlib-bsearch.h \
+  /usr/include/bits/stdlib-float.h \
+  /usr/include/string.h \
+  /usr/include/bits/string.h \
+  /usr/include/bits/string2.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdbool.h \
+  /usr/include/libintl.h \
+  /usr/include/locale.h \
+  /usr/include/bits/locale.h \
+  /usr/include/curses.h \
+  /usr/include/ncurses_dll.h \
+  /usr/include/stdio.h \
+  /usr/include/libio.h \
+  /usr/include/_G_config.h \
+  /usr/include/wchar.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdarg.h \
+  /usr/include/bits/stdio_lim.h \
+  /usr/include/bits/sys_errlist.h \
+  /usr/include/bits/stdio.h \
+  /usr/include/unctrl.h \
+  /usr/include/curses.h \
+
+scripts/kconfig/lxdialog/textbox.o: $(deps_scripts/kconfig/lxdialog/textbox.o)
+
+$(deps_scripts/kconfig/lxdialog/textbox.o):
diff -Nur linux-4.2/scripts/kconfig/lxdialog/.util.o.cmd linux-4.2-pck/scripts/kconfig/lxdialog/.util.o.cmd
--- linux-4.2/scripts/kconfig/lxdialog/.util.o.cmd	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/lxdialog/.util.o.cmd	2015-09-08 01:26:17.828989191 -0300
@@ -0,0 +1,68 @@
+cmd_scripts/kconfig/lxdialog/util.o := gcc -Wp,-MD,scripts/kconfig/lxdialog/.util.o.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89    -DCURSES_LOC="<ncurses.h>" -DNCURSES_WIDECHAR=1 -DLOCALE   -c -o scripts/kconfig/lxdialog/util.o scripts/kconfig/lxdialog/util.c
+
+source_scripts/kconfig/lxdialog/util.o := scripts/kconfig/lxdialog/util.c
+
+deps_scripts/kconfig/lxdialog/util.o := \
+    $(wildcard include/config/color.h) \
+  /usr/include/stdc-predef.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdarg.h \
+  scripts/kconfig/lxdialog/dialog.h \
+  /usr/include/sys/types.h \
+  /usr/include/features.h \
+  /usr/include/sys/cdefs.h \
+  /usr/include/bits/wordsize.h \
+  /usr/include/gnu/stubs.h \
+  /usr/include/gnu/stubs-64.h \
+  /usr/include/bits/types.h \
+  /usr/include/bits/typesizes.h \
+  /usr/include/time.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stddef.h \
+  /usr/include/endian.h \
+  /usr/include/bits/endian.h \
+  /usr/include/bits/byteswap.h \
+  /usr/include/bits/byteswap-16.h \
+  /usr/include/sys/select.h \
+  /usr/include/bits/select.h \
+  /usr/include/bits/sigset.h \
+  /usr/include/bits/time.h \
+  /usr/include/sys/sysmacros.h \
+  /usr/include/bits/pthreadtypes.h \
+  /usr/include/fcntl.h \
+  /usr/include/bits/fcntl.h \
+  /usr/include/bits/fcntl-linux.h \
+  /usr/include/bits/stat.h \
+  /usr/include/unistd.h \
+  /usr/include/bits/posix_opt.h \
+  /usr/include/bits/environments.h \
+  /usr/include/bits/confname.h \
+  /usr/include/getopt.h \
+  /usr/include/ctype.h \
+  /usr/include/xlocale.h \
+  /usr/include/stdlib.h \
+  /usr/include/bits/waitflags.h \
+  /usr/include/bits/waitstatus.h \
+  /usr/include/alloca.h \
+  /usr/include/bits/stdlib-bsearch.h \
+  /usr/include/bits/stdlib-float.h \
+  /usr/include/string.h \
+  /usr/include/bits/string.h \
+  /usr/include/bits/string2.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdbool.h \
+  /usr/include/libintl.h \
+  /usr/include/locale.h \
+  /usr/include/bits/locale.h \
+  /usr/include/curses.h \
+  /usr/include/ncurses_dll.h \
+  /usr/include/stdio.h \
+  /usr/include/libio.h \
+  /usr/include/_G_config.h \
+  /usr/include/wchar.h \
+  /usr/include/bits/stdio_lim.h \
+  /usr/include/bits/sys_errlist.h \
+  /usr/include/bits/stdio.h \
+  /usr/include/unctrl.h \
+  /usr/include/curses.h \
+
+scripts/kconfig/lxdialog/util.o: $(deps_scripts/kconfig/lxdialog/util.o)
+
+$(deps_scripts/kconfig/lxdialog/util.o):
diff -Nur linux-4.2/scripts/kconfig/lxdialog/.yesno.o.cmd linux-4.2-pck/scripts/kconfig/lxdialog/.yesno.o.cmd
--- linux-4.2/scripts/kconfig/lxdialog/.yesno.o.cmd	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/lxdialog/.yesno.o.cmd	2015-09-08 01:26:19.025597088 -0300
@@ -0,0 +1,67 @@
+cmd_scripts/kconfig/lxdialog/yesno.o := gcc -Wp,-MD,scripts/kconfig/lxdialog/.yesno.o.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89    -DCURSES_LOC="<ncurses.h>" -DNCURSES_WIDECHAR=1 -DLOCALE   -c -o scripts/kconfig/lxdialog/yesno.o scripts/kconfig/lxdialog/yesno.c
+
+source_scripts/kconfig/lxdialog/yesno.o := scripts/kconfig/lxdialog/yesno.c
+
+deps_scripts/kconfig/lxdialog/yesno.o := \
+  /usr/include/stdc-predef.h \
+  scripts/kconfig/lxdialog/dialog.h \
+  /usr/include/sys/types.h \
+  /usr/include/features.h \
+  /usr/include/sys/cdefs.h \
+  /usr/include/bits/wordsize.h \
+  /usr/include/gnu/stubs.h \
+  /usr/include/gnu/stubs-64.h \
+  /usr/include/bits/types.h \
+  /usr/include/bits/typesizes.h \
+  /usr/include/time.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stddef.h \
+  /usr/include/endian.h \
+  /usr/include/bits/endian.h \
+  /usr/include/bits/byteswap.h \
+  /usr/include/bits/byteswap-16.h \
+  /usr/include/sys/select.h \
+  /usr/include/bits/select.h \
+  /usr/include/bits/sigset.h \
+  /usr/include/bits/time.h \
+  /usr/include/sys/sysmacros.h \
+  /usr/include/bits/pthreadtypes.h \
+  /usr/include/fcntl.h \
+  /usr/include/bits/fcntl.h \
+  /usr/include/bits/fcntl-linux.h \
+  /usr/include/bits/stat.h \
+  /usr/include/unistd.h \
+  /usr/include/bits/posix_opt.h \
+  /usr/include/bits/environments.h \
+  /usr/include/bits/confname.h \
+  /usr/include/getopt.h \
+  /usr/include/ctype.h \
+  /usr/include/xlocale.h \
+  /usr/include/stdlib.h \
+  /usr/include/bits/waitflags.h \
+  /usr/include/bits/waitstatus.h \
+  /usr/include/alloca.h \
+  /usr/include/bits/stdlib-bsearch.h \
+  /usr/include/bits/stdlib-float.h \
+  /usr/include/string.h \
+  /usr/include/bits/string.h \
+  /usr/include/bits/string2.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdbool.h \
+  /usr/include/libintl.h \
+  /usr/include/locale.h \
+  /usr/include/bits/locale.h \
+  /usr/include/curses.h \
+  /usr/include/ncurses_dll.h \
+  /usr/include/stdio.h \
+  /usr/include/libio.h \
+  /usr/include/_G_config.h \
+  /usr/include/wchar.h \
+  /usr/lib/gcc/x86_64-unknown-linux-gnu/5.2.0/include/stdarg.h \
+  /usr/include/bits/stdio_lim.h \
+  /usr/include/bits/sys_errlist.h \
+  /usr/include/bits/stdio.h \
+  /usr/include/unctrl.h \
+  /usr/include/curses.h \
+
+scripts/kconfig/lxdialog/yesno.o: $(deps_scripts/kconfig/lxdialog/yesno.o)
+
+$(deps_scripts/kconfig/lxdialog/yesno.o):
Binary files linux-4.2/scripts/kconfig/lxdialog/checklist.o and linux-4.2-pck/scripts/kconfig/lxdialog/checklist.o differ
Binary files linux-4.2/scripts/kconfig/lxdialog/inputbox.o and linux-4.2-pck/scripts/kconfig/lxdialog/inputbox.o differ
Binary files linux-4.2/scripts/kconfig/lxdialog/menubox.o and linux-4.2-pck/scripts/kconfig/lxdialog/menubox.o differ
Binary files linux-4.2/scripts/kconfig/lxdialog/textbox.o and linux-4.2-pck/scripts/kconfig/lxdialog/textbox.o differ
Binary files linux-4.2/scripts/kconfig/lxdialog/util.o and linux-4.2-pck/scripts/kconfig/lxdialog/util.o differ
Binary files linux-4.2/scripts/kconfig/lxdialog/yesno.o and linux-4.2-pck/scripts/kconfig/lxdialog/yesno.o differ
Binary files linux-4.2/scripts/kconfig/mconf and linux-4.2-pck/scripts/kconfig/mconf differ
Binary files linux-4.2/scripts/kconfig/mconf.o and linux-4.2-pck/scripts/kconfig/mconf.o differ
diff -Nur linux-4.2/scripts/kconfig/zconf.hash.c linux-4.2-pck/scripts/kconfig/zconf.hash.c
--- linux-4.2/scripts/kconfig/zconf.hash.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/zconf.hash.c	2015-09-08 01:26:14.849135538 -0300
@@ -0,0 +1,289 @@
+/* ANSI-C code produced by gperf version 3.0.4 */
+/* Command-line: gperf -t --output-file scripts/kconfig/zconf.hash.c_shipped -a -C -E -g -k '1,3,$' -p -t scripts/kconfig/zconf.gperf  */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646.  */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
+#endif
+
+#line 10 "scripts/kconfig/zconf.gperf"
+struct kconf_id;
+
+static const struct kconf_id *kconf_id_lookup(register const char *str, register unsigned int len);
+/* maximum key range = 71, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+kconf_id_hash (register const char *str, register unsigned int len)
+{
+  static const unsigned char asso_values[] =
+    {
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73,  5, 25, 25,
+       0,  0,  0,  5,  0,  0, 73, 73,  5,  0,
+      10,  5, 45, 73, 20, 20,  0, 15, 15, 73,
+      20,  5, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+      73, 73, 73, 73, 73, 73
+    };
+  register int hval = len;
+
+  switch (hval)
+    {
+      default:
+        hval += asso_values[(unsigned char)str[2]];
+      /*FALLTHROUGH*/
+      case 2:
+      case 1:
+        hval += asso_values[(unsigned char)str[0]];
+        break;
+    }
+  return hval + asso_values[(unsigned char)str[len - 1]];
+}
+
+struct kconf_id_strings_t
+  {
+    char kconf_id_strings_str2[sizeof("if")];
+    char kconf_id_strings_str3[sizeof("int")];
+    char kconf_id_strings_str5[sizeof("endif")];
+    char kconf_id_strings_str7[sizeof("default")];
+    char kconf_id_strings_str8[sizeof("tristate")];
+    char kconf_id_strings_str9[sizeof("endchoice")];
+    char kconf_id_strings_str12[sizeof("def_tristate")];
+    char kconf_id_strings_str13[sizeof("def_bool")];
+    char kconf_id_strings_str14[sizeof("defconfig_list")];
+    char kconf_id_strings_str17[sizeof("on")];
+    char kconf_id_strings_str18[sizeof("optional")];
+    char kconf_id_strings_str21[sizeof("option")];
+    char kconf_id_strings_str22[sizeof("endmenu")];
+    char kconf_id_strings_str23[sizeof("mainmenu")];
+    char kconf_id_strings_str25[sizeof("menuconfig")];
+    char kconf_id_strings_str27[sizeof("modules")];
+    char kconf_id_strings_str28[sizeof("allnoconfig_y")];
+    char kconf_id_strings_str29[sizeof("menu")];
+    char kconf_id_strings_str31[sizeof("select")];
+    char kconf_id_strings_str32[sizeof("comment")];
+    char kconf_id_strings_str33[sizeof("env")];
+    char kconf_id_strings_str35[sizeof("range")];
+    char kconf_id_strings_str36[sizeof("choice")];
+    char kconf_id_strings_str39[sizeof("bool")];
+    char kconf_id_strings_str41[sizeof("source")];
+    char kconf_id_strings_str42[sizeof("visible")];
+    char kconf_id_strings_str43[sizeof("hex")];
+    char kconf_id_strings_str46[sizeof("config")];
+    char kconf_id_strings_str47[sizeof("boolean")];
+    char kconf_id_strings_str51[sizeof("string")];
+    char kconf_id_strings_str54[sizeof("help")];
+    char kconf_id_strings_str56[sizeof("prompt")];
+    char kconf_id_strings_str72[sizeof("depends")];
+  };
+static const struct kconf_id_strings_t kconf_id_strings_contents =
+  {
+    "if",
+    "int",
+    "endif",
+    "default",
+    "tristate",
+    "endchoice",
+    "def_tristate",
+    "def_bool",
+    "defconfig_list",
+    "on",
+    "optional",
+    "option",
+    "endmenu",
+    "mainmenu",
+    "menuconfig",
+    "modules",
+    "allnoconfig_y",
+    "menu",
+    "select",
+    "comment",
+    "env",
+    "range",
+    "choice",
+    "bool",
+    "source",
+    "visible",
+    "hex",
+    "config",
+    "boolean",
+    "string",
+    "help",
+    "prompt",
+    "depends"
+  };
+#define kconf_id_strings ((const char *) &kconf_id_strings_contents)
+#ifdef __GNUC__
+__inline
+#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+const struct kconf_id *
+kconf_id_lookup (register const char *str, register unsigned int len)
+{
+  enum
+    {
+      TOTAL_KEYWORDS = 33,
+      MIN_WORD_LENGTH = 2,
+      MAX_WORD_LENGTH = 14,
+      MIN_HASH_VALUE = 2,
+      MAX_HASH_VALUE = 72
+    };
+
+  static const struct kconf_id wordlist[] =
+    {
+      {-1}, {-1},
+#line 25 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str2,		T_IF,		TF_COMMAND|TF_PARAM},
+#line 36 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str3,		T_TYPE,		TF_COMMAND, S_INT},
+      {-1},
+#line 26 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str5,		T_ENDIF,	TF_COMMAND},
+      {-1},
+#line 29 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str7,	T_DEFAULT,	TF_COMMAND, S_UNKNOWN},
+#line 31 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str8,	T_TYPE,		TF_COMMAND, S_TRISTATE},
+#line 20 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str9,	T_ENDCHOICE,	TF_COMMAND},
+      {-1}, {-1},
+#line 32 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str12,	T_DEFAULT,	TF_COMMAND, S_TRISTATE},
+#line 35 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str13,	T_DEFAULT,	TF_COMMAND, S_BOOLEAN},
+#line 45 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str14,	T_OPT_DEFCONFIG_LIST,TF_OPTION},
+      {-1}, {-1},
+#line 43 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str17,		T_ON,		TF_PARAM},
+#line 28 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str18,	T_OPTIONAL,	TF_COMMAND},
+      {-1}, {-1},
+#line 42 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str21,		T_OPTION,	TF_COMMAND},
+#line 17 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str22,	T_ENDMENU,	TF_COMMAND},
+#line 15 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str23,	T_MAINMENU,	TF_COMMAND},
+      {-1},
+#line 23 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str25,	T_MENUCONFIG,	TF_COMMAND},
+      {-1},
+#line 44 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str27,	T_OPT_MODULES,	TF_OPTION},
+#line 47 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str28,	T_OPT_ALLNOCONFIG_Y,TF_OPTION},
+#line 16 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str29,		T_MENU,		TF_COMMAND},
+      {-1},
+#line 39 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str31,		T_SELECT,	TF_COMMAND},
+#line 21 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str32,	T_COMMENT,	TF_COMMAND},
+#line 46 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str33,		T_OPT_ENV,	TF_OPTION},
+      {-1},
+#line 40 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str35,		T_RANGE,	TF_COMMAND},
+#line 19 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str36,		T_CHOICE,	TF_COMMAND},
+      {-1}, {-1},
+#line 33 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str39,		T_TYPE,		TF_COMMAND, S_BOOLEAN},
+      {-1},
+#line 18 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str41,		T_SOURCE,	TF_COMMAND},
+#line 41 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str42,	T_VISIBLE,	TF_COMMAND},
+#line 37 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str43,		T_TYPE,		TF_COMMAND, S_HEX},
+      {-1}, {-1},
+#line 22 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str46,		T_CONFIG,	TF_COMMAND},
+#line 34 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str47,	T_TYPE,		TF_COMMAND, S_BOOLEAN},
+      {-1}, {-1}, {-1},
+#line 38 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str51,		T_TYPE,		TF_COMMAND, S_STRING},
+      {-1}, {-1},
+#line 24 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str54,		T_HELP,		TF_COMMAND},
+      {-1},
+#line 30 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str56,		T_PROMPT,	TF_COMMAND},
+      {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+      {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+#line 27 "scripts/kconfig/zconf.gperf"
+      {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str72,	T_DEPENDS,	TF_COMMAND}
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      register int key = kconf_id_hash (str, len);
+
+      if (key <= MAX_HASH_VALUE && key >= 0)
+        {
+          register int o = wordlist[key].name;
+          if (o >= 0)
+            {
+              register const char *s = o + kconf_id_strings;
+
+              if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
+                return &wordlist[key];
+            }
+        }
+    }
+  return 0;
+}
+#line 48 "scripts/kconfig/zconf.gperf"
+
diff -Nur linux-4.2/scripts/kconfig/zconf.lex.c linux-4.2-pck/scripts/kconfig/zconf.lex.c
--- linux-4.2/scripts/kconfig/zconf.lex.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/zconf.lex.c	2015-09-08 01:26:14.842469201 -0300
@@ -0,0 +1,2476 @@
+
+#line 3 "scripts/kconfig/zconf.lex.c_shipped"
+
+#define  YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define yy_create_buffer zconf_create_buffer
+#define yy_delete_buffer zconf_delete_buffer
+#define yy_flex_debug zconf_flex_debug
+#define yy_init_buffer zconf_init_buffer
+#define yy_flush_buffer zconf_flush_buffer
+#define yy_load_buffer_state zconf_load_buffer_state
+#define yy_switch_to_buffer zconf_switch_to_buffer
+#define yyin zconfin
+#define yyleng zconfleng
+#define yylex zconflex
+#define yylineno zconflineno
+#define yyout zconfout
+#define yyrestart zconfrestart
+#define yytext zconftext
+#define yywrap zconfwrap
+#define yyalloc zconfalloc
+#define yyrealloc zconfrealloc
+#define yyfree zconffree
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+#define YY_FLEX_SUBMINOR_VERSION 35
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with  platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types. 
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t; 
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+#endif /* ! C99 */
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN               (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN              (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN              (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX               (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX              (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX              (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX              (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX             (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX             (4294967295U)
+#endif
+
+#endif /* ! FLEXINT_H */
+
+#ifdef __cplusplus
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else	/* ! __cplusplus */
+
+/* C99 requires __STDC__ to be defined as 1. */
+#if defined (__STDC__)
+
+#define YY_USE_CONST
+
+#endif	/* defined (__STDC__) */
+#endif	/* ! __cplusplus */
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an unsigned
+ * integer for use as an array index.  If the signed char is negative,
+ * we want to instead treat it as an 8-bit unsigned char, hence the
+ * double cast.
+ */
+#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
+
+/* Enter a start condition.  This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN (yy_start) = 1 + 2 *
+
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state.  The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START (((yy_start) - 1) / 2)
+#define YYSTATE YY_START
+
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE zconfrestart(zconfin  )
+
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#define YY_BUF_SIZE 16384
+#endif
+
+/* The state buf must be large enough to hold one state per character in the main buffer.
+ */
+#define YY_STATE_BUF_SIZE   ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+extern int zconfleng;
+
+extern FILE *zconfin, *zconfout;
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+    #define YY_LESS_LINENO(n)
+    
+/* Return all but the first "n" matched characters back to the input stream. */
+#define yyless(n) \
+	do \
+		{ \
+		/* Undo effects of setting up zconftext. */ \
+        int yyless_macro_arg = (n); \
+        YY_LESS_LINENO(yyless_macro_arg);\
+		*yy_cp = (yy_hold_char); \
+		YY_RESTORE_YY_MORE_OFFSET \
+		(yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
+		YY_DO_BEFORE_ACTION; /* set up zconftext again */ \
+		} \
+	while ( 0 )
+
+#define unput(c) yyunput( c, (yytext_ptr)  )
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+	{
+	FILE *yy_input_file;
+
+	char *yy_ch_buf;		/* input buffer */
+	char *yy_buf_pos;		/* current position in input buffer */
+
+	/* Size of input buffer in bytes, not including room for EOB
+	 * characters.
+	 */
+	yy_size_t yy_buf_size;
+
+	/* Number of characters read into yy_ch_buf, not including EOB
+	 * characters.
+	 */
+	int yy_n_chars;
+
+	/* Whether we "own" the buffer - i.e., we know we created it,
+	 * and can realloc() it to grow it, and should free() it to
+	 * delete it.
+	 */
+	int yy_is_our_buffer;
+
+	/* Whether this is an "interactive" input source; if so, and
+	 * if we're using stdio for input, then we want to use getc()
+	 * instead of fread(), to make sure we stop fetching input after
+	 * each newline.
+	 */
+	int yy_is_interactive;
+
+	/* Whether we're considered to be at the beginning of a line.
+	 * If so, '^' rules will be active on the next match, otherwise
+	 * not.
+	 */
+	int yy_at_bol;
+
+    int yy_bs_lineno; /**< The line count. */
+    int yy_bs_column; /**< The column count. */
+    
+	/* Whether to try to fill the input buffer when we reach the
+	 * end of it.
+	 */
+	int yy_fill_buffer;
+
+	int yy_buffer_status;
+
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+	/* When an EOF's been seen but there's still some text to process
+	 * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+	 * shouldn't try reading from the input source any more.  We might
+	 * still have a bunch of tokens to match, though, because of
+	 * possible backing-up.
+	 *
+	 * When we actually see the EOF, we change the status to "new"
+	 * (via zconfrestart()), so that the user can continue scanning by
+	 * just pointing zconfin at a new input file.
+	 */
+#define YY_BUFFER_EOF_PENDING 2
+
+	};
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+/* Stack of input buffers. */
+static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */
+static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */
+static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ *
+ * Returns the top of the stack, or NULL.
+ */
+#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \
+                          ? (yy_buffer_stack)[(yy_buffer_stack_top)] \
+                          : NULL)
+
+/* Same as previous macro, but useful when we know that the buffer stack is not
+ * NULL or when we need an lvalue. For internal use only.
+ */
+#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)]
+
+/* yy_hold_char holds the character lost when zconftext is formed. */
+static char yy_hold_char;
+static int yy_n_chars;		/* number of characters read into yy_ch_buf */
+int zconfleng;
+
+/* Points to current character in buffer. */
+static char *yy_c_buf_p = (char *) 0;
+static int yy_init = 0;		/* whether we need to initialize */
+static int yy_start = 0;	/* start state number */
+
+/* Flag which is used to allow zconfwrap()'s to do buffer switches
+ * instead of setting up a fresh zconfin.  A bit of a hack ...
+ */
+static int yy_did_buffer_switch_on_eof;
+
+void zconfrestart (FILE *input_file  );
+void zconf_switch_to_buffer (YY_BUFFER_STATE new_buffer  );
+YY_BUFFER_STATE zconf_create_buffer (FILE *file,int size  );
+void zconf_delete_buffer (YY_BUFFER_STATE b  );
+void zconf_flush_buffer (YY_BUFFER_STATE b  );
+void zconfpush_buffer_state (YY_BUFFER_STATE new_buffer  );
+void zconfpop_buffer_state (void );
+
+static void zconfensure_buffer_stack (void );
+static void zconf_load_buffer_state (void );
+static void zconf_init_buffer (YY_BUFFER_STATE b,FILE *file  );
+
+#define YY_FLUSH_BUFFER zconf_flush_buffer(YY_CURRENT_BUFFER )
+
+YY_BUFFER_STATE zconf_scan_buffer (char *base,yy_size_t size  );
+YY_BUFFER_STATE zconf_scan_string (yyconst char *yy_str  );
+YY_BUFFER_STATE zconf_scan_bytes (yyconst char *bytes,int len  );
+
+void *zconfalloc (yy_size_t  );
+void *zconfrealloc (void *,yy_size_t  );
+void zconffree (void *  );
+
+#define yy_new_buffer zconf_create_buffer
+
+#define yy_set_interactive(is_interactive) \
+	{ \
+	if ( ! YY_CURRENT_BUFFER ){ \
+        zconfensure_buffer_stack (); \
+		YY_CURRENT_BUFFER_LVALUE =    \
+            zconf_create_buffer(zconfin,YY_BUF_SIZE ); \
+	} \
+	YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
+	}
+
+#define yy_set_bol(at_bol) \
+	{ \
+	if ( ! YY_CURRENT_BUFFER ){\
+        zconfensure_buffer_stack (); \
+		YY_CURRENT_BUFFER_LVALUE =    \
+            zconf_create_buffer(zconfin,YY_BUF_SIZE ); \
+	} \
+	YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
+	}
+
+#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
+
+/* Begin user sect3 */
+
+#define zconfwrap(n) 1
+#define YY_SKIP_YYWRAP
+
+typedef unsigned char YY_CHAR;
+
+FILE *zconfin = (FILE *) 0, *zconfout = (FILE *) 0;
+
+typedef int yy_state_type;
+
+extern int zconflineno;
+
+int zconflineno = 1;
+
+extern char *zconftext;
+#define yytext_ptr zconftext
+static yyconst flex_int16_t yy_nxt[][19] =
+    {
+    {
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+        0,    0,    0,    0,    0,    0,    0,    0,    0
+    },
+
+    {
+       11,   12,   13,   14,   12,   12,   15,   12,   12,   12,
+       12,   12,   12,   12,   12,   12,   12,   12,   12
+    },
+
+    {
+       11,   12,   13,   14,   12,   12,   15,   12,   12,   12,
+       12,   12,   12,   12,   12,   12,   12,   12,   12
+    },
+
+    {
+       11,   16,   16,   17,   16,   16,   16,   16,   16,   16,
+       16,   16,   16,   18,   16,   16,   16,   16,   16
+    },
+
+    {
+       11,   16,   16,   17,   16,   16,   16,   16,   16,   16,
+       16,   16,   16,   18,   16,   16,   16,   16,   16
+
+    },
+
+    {
+       11,   19,   20,   21,   19,   19,   19,   19,   19,   19,
+       19,   19,   19,   19,   19,   19,   19,   19,   19
+    },
+
+    {
+       11,   19,   20,   21,   19,   19,   19,   19,   19,   19,
+       19,   19,   19,   19,   19,   19,   19,   19,   19
+    },
+
+    {
+       11,   22,   22,   23,   22,   24,   22,   22,   24,   22,
+       22,   22,   22,   22,   22,   22,   22,   25,   22
+    },
+
+    {
+       11,   22,   22,   23,   22,   24,   22,   22,   24,   22,
+       22,   22,   22,   22,   22,   22,   22,   25,   22
+    },
+
+    {
+       11,   26,   27,   28,   29,   30,   31,   32,   30,   33,
+       34,   35,   36,   36,   37,   38,   39,   40,   41
+
+    },
+
+    {
+       11,   26,   27,   28,   29,   30,   31,   32,   30,   33,
+       34,   35,   36,   36,   37,   38,   39,   40,   41
+    },
+
+    {
+      -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,
+      -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11
+    },
+
+    {
+       11,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,
+      -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12
+    },
+
+    {
+       11,  -13,   42,   43,  -13,  -13,   44,  -13,  -13,  -13,
+      -13,  -13,  -13,  -13,  -13,  -13,  -13,  -13,  -13
+    },
+
+    {
+       11,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,
+      -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14
+
+    },
+
+    {
+       11,   45,   45,   46,   45,   45,   45,   45,   45,   45,
+       45,   45,   45,   45,   45,   45,   45,   45,   45
+    },
+
+    {
+       11,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,
+      -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16
+    },
+
+    {
+       11,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,
+      -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17
+    },
+
+    {
+       11,  -18,  -18,  -18,  -18,  -18,  -18,  -18,  -18,  -18,
+      -18,  -18,  -18,   47,  -18,  -18,  -18,  -18,  -18
+    },
+
+    {
+       11,   48,   48,  -19,   48,   48,   48,   48,   48,   48,
+       48,   48,   48,   48,   48,   48,   48,   48,   48
+
+    },
+
+    {
+       11,  -20,   49,   50,  -20,  -20,  -20,  -20,  -20,  -20,
+      -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20
+    },
+
+    {
+       11,   51,  -21,  -21,   51,   51,   51,   51,   51,   51,
+       51,   51,   51,   51,   51,   51,   51,   51,   51
+    },
+
+    {
+       11,   52,   52,   53,   52,  -22,   52,   52,  -22,   52,
+       52,   52,   52,   52,   52,   52,   52,  -22,   52
+    },
+
+    {
+       11,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,
+      -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23
+    },
+
+    {
+       11,  -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24,
+      -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24
+
+    },
+
+    {
+       11,   54,   54,   55,   54,   54,   54,   54,   54,   54,
+       54,   54,   54,   54,   54,   54,   54,   54,   54
+    },
+
+    {
+       11,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,
+      -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26
+    },
+
+    {
+       11,  -27,   56,  -27,  -27,  -27,  -27,  -27,  -27,  -27,
+      -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27
+    },
+
+    {
+       11,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,
+      -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28
+    },
+
+    {
+       11,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,
+      -29,  -29,  -29,  -29,  -29,   57,  -29,  -29,  -29
+
+    },
+
+    {
+       11,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,
+      -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30
+    },
+
+    {
+       11,   58,   58,  -31,   58,   58,   58,   58,   58,   58,
+       58,   58,   58,   58,   58,   58,   58,   58,   58
+    },
+
+    {
+       11,  -32,  -32,  -32,  -32,  -32,  -32,   59,  -32,  -32,
+      -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32
+    },
+
+    {
+       11,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,
+      -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33
+    },
+
+    {
+       11,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,
+      -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34
+
+    },
+
+    {
+       11,  -35,  -35,  -35,  -35,  -35,  -35,  -35,  -35,  -35,
+      -35,   60,   61,   61,  -35,  -35,  -35,  -35,  -35
+    },
+
+    {
+       11,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,
+      -36,   61,   61,   61,  -36,  -36,  -36,  -36,  -36
+    },
+
+    {
+       11,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,
+      -37,  -37,  -37,  -37,  -37,   62,  -37,  -37,  -37
+    },
+
+    {
+       11,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,
+      -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38
+    },
+
+    {
+       11,  -39,  -39,  -39,  -39,  -39,  -39,  -39,  -39,  -39,
+      -39,  -39,  -39,  -39,  -39,   63,  -39,  -39,  -39
+
+    },
+
+    {
+       11,  -40,  -40,   64,  -40,  -40,  -40,  -40,  -40,  -40,
+      -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40
+    },
+
+    {
+       11,  -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,
+      -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,   65
+    },
+
+    {
+       11,  -42,   42,   43,  -42,  -42,   44,  -42,  -42,  -42,
+      -42,  -42,  -42,  -42,  -42,  -42,  -42,  -42,  -42
+    },
+
+    {
+       11,  -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43,
+      -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43
+    },
+
+    {
+       11,   45,   45,   46,   45,   45,   45,   45,   45,   45,
+       45,   45,   45,   45,   45,   45,   45,   45,   45
+
+    },
+
+    {
+       11,   45,   45,   46,   45,   45,   45,   45,   45,   45,
+       45,   45,   45,   45,   45,   45,   45,   45,   45
+    },
+
+    {
+       11,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,
+      -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46
+    },
+
+    {
+       11,  -47,  -47,  -47,  -47,  -47,  -47,  -47,  -47,  -47,
+      -47,  -47,  -47,   47,  -47,  -47,  -47,  -47,  -47
+    },
+
+    {
+       11,   48,   48,  -48,   48,   48,   48,   48,   48,   48,
+       48,   48,   48,   48,   48,   48,   48,   48,   48
+    },
+
+    {
+       11,  -49,   49,   50,  -49,  -49,  -49,  -49,  -49,  -49,
+      -49,  -49,  -49,  -49,  -49,  -49,  -49,  -49,  -49
+
+    },
+
+    {
+       11,   51,  -50,  -50,   51,   51,   51,   51,   51,   51,
+       51,   51,   51,   51,   51,   51,   51,   51,   51
+    },
+
+    {
+       11,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,
+      -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51
+    },
+
+    {
+       11,   52,   52,   53,   52,  -52,   52,   52,  -52,   52,
+       52,   52,   52,   52,   52,   52,   52,  -52,   52
+    },
+
+    {
+       11,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,
+      -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53
+    },
+
+    {
+       11,  -54,  -54,   55,  -54,  -54,  -54,  -54,  -54,  -54,
+      -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54
+
+    },
+
+    {
+       11,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,
+      -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55
+    },
+
+    {
+       11,  -56,   56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,
+      -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56
+    },
+
+    {
+       11,  -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57,
+      -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57
+    },
+
+    {
+       11,   58,   58,  -58,   58,   58,   58,   58,   58,   58,
+       58,   58,   58,   58,   58,   58,   58,   58,   58
+    },
+
+    {
+       11,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,
+      -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59
+
+    },
+
+    {
+       11,  -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,
+      -60,   66,   61,   61,  -60,  -60,  -60,  -60,  -60
+    },
+
+    {
+       11,  -61,  -61,  -61,  -61,  -61,  -61,  -61,  -61,  -61,
+      -61,   61,   61,   61,  -61,  -61,  -61,  -61,  -61
+    },
+
+    {
+       11,  -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62,
+      -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62
+    },
+
+    {
+       11,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,
+      -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63
+    },
+
+    {
+       11,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,
+      -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64
+
+    },
+
+    {
+       11,  -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65,
+      -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65
+    },
+
+    {
+       11,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,
+      -66,   61,   61,   61,  -66,  -66,  -66,  -66,  -66
+    },
+
+    } ;
+
+static yy_state_type yy_get_previous_state (void );
+static yy_state_type yy_try_NUL_trans (yy_state_type current_state  );
+static int yy_get_next_buffer (void );
+static void yy_fatal_error (yyconst char msg[]  );
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up zconftext.
+ */
+#define YY_DO_BEFORE_ACTION \
+	(yytext_ptr) = yy_bp; \
+	zconfleng = (size_t) (yy_cp - yy_bp); \
+	(yy_hold_char) = *yy_cp; \
+	*yy_cp = '\0'; \
+	(yy_c_buf_p) = yy_cp;
+
+#define YY_NUM_RULES 38
+#define YY_END_OF_BUFFER 39
+/* This struct is not used in this scanner,
+   but its presence is necessary. */
+struct yy_trans_info
+	{
+	flex_int32_t yy_verify;
+	flex_int32_t yy_nxt;
+	};
+static yyconst flex_int16_t yy_accept[67] =
+    {   0,
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+       39,    5,    4,    2,    3,    7,    8,    6,   37,   34,
+       36,   29,   33,   32,   31,   27,   26,   21,   13,   20,
+       24,   27,   11,   12,   23,   23,   18,   14,   19,   27,
+       27,    4,    2,    3,    3,    1,    6,   37,   34,   36,
+       35,   29,   28,   31,   30,   26,   15,   24,    9,   23,
+       23,   16,   17,   25,   10,   22
+    } ;
+
+static yyconst flex_int32_t yy_ec[256] =
+    {   0,
+        1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    2,    4,    5,    6,    1,    1,    7,    8,    9,
+       10,    1,    1,    1,   11,   12,   12,   13,   13,   13,
+       13,   13,   13,   13,   13,   13,   13,    1,    1,   14,
+       15,   16,    1,    1,   13,   13,   13,   13,   13,   13,
+       13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
+       13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
+        1,   17,    1,    1,   13,    1,   13,   13,   13,   13,
+
+       13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
+       13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
+       13,   13,    1,   18,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1
+    } ;
+
+extern int zconf_flex_debug;
+int zconf_flex_debug = 0;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+char *zconftext;
+#define YY_NO_INPUT 1
+
+/*
+ * Copyright (C) 2002 Roman Zippel <zippel@linux-m68k.org>
+ * Released under the terms of the GNU GPL v2.0.
+ */
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "lkc.h"
+
+#define START_STRSIZE	16
+
+static struct {
+	struct file *file;
+	int lineno;
+} current_pos;
+
+static char *text;
+static int text_size, text_asize;
+
+struct buffer {
+	struct buffer *parent;
+	YY_BUFFER_STATE state;
+};
+
+struct buffer *current_buf;
+
+static int last_ts, first_ts;
+
+static void zconf_endhelp(void);
+static void zconf_endfile(void);
+
+static void new_string(void)
+{
+	text = xmalloc(START_STRSIZE);
+	text_asize = START_STRSIZE;
+	text_size = 0;
+	*text = 0;
+}
+
+static void append_string(const char *str, int size)
+{
+	int new_size = text_size + size + 1;
+	if (new_size > text_asize) {
+		new_size += START_STRSIZE - 1;
+		new_size &= -START_STRSIZE;
+		text = realloc(text, new_size);
+		text_asize = new_size;
+	}
+	memcpy(text + text_size, str, size);
+	text_size += size;
+	text[text_size] = 0;
+}
+
+static void alloc_string(const char *str, int size)
+{
+	text = xmalloc(size + 1);
+	memcpy(text, str, size);
+	text[size] = 0;
+}
+
+#define INITIAL 0
+#define COMMAND 1
+#define HELP 2
+#define STRING 3
+#define PARAM 4
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+static int yy_init_globals (void );
+
+/* Accessor methods to globals.
+   These are made visible to non-reentrant scanners for convenience. */
+
+int zconflex_destroy (void );
+
+int zconfget_debug (void );
+
+void zconfset_debug (int debug_flag  );
+
+YY_EXTRA_TYPE zconfget_extra (void );
+
+void zconfset_extra (YY_EXTRA_TYPE user_defined  );
+
+FILE *zconfget_in (void );
+
+void zconfset_in  (FILE * in_str  );
+
+FILE *zconfget_out (void );
+
+void zconfset_out  (FILE * out_str  );
+
+int zconfget_leng (void );
+
+char *zconfget_text (void );
+
+int zconfget_lineno (void );
+
+void zconfset_lineno (int line_number  );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int zconfwrap (void );
+#else
+extern int zconfwrap (void );
+#endif
+#endif
+
+    static void yyunput (int c,char *buf_ptr  );
+    
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char *,yyconst char *,int );
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * );
+#endif
+
+#ifndef YY_NO_INPUT
+
+#ifdef __cplusplus
+static int yyinput (void );
+#else
+static int input (void );
+#endif
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#define YY_READ_BUF_SIZE 8192
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO fwrite( zconftext, zconfleng, 1, zconfout )
+#endif
+
+/* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+	errno=0; \
+	while ( (result = read( fileno(zconfin), (char *) buf, max_size )) < 0 ) \
+	{ \
+		if( errno != EINTR) \
+		{ \
+			YY_FATAL_ERROR( "input in flex scanner failed" ); \
+			break; \
+		} \
+		errno=0; \
+		clearerr(zconfin); \
+	}\
+\
+
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
+#endif
+
+/* end tables serialization structures and prototypes */
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int zconflex (void);
+
+#define YY_DECL int zconflex (void)
+#endif /* !YY_DECL */
+
+/* Code executed at the beginning of each rule, after zconftext and zconfleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK break;
+#endif
+
+#define YY_RULE_SETUP \
+	YY_USER_ACTION
+
+/** The main scanner function which does all the work.
+ */
+YY_DECL
+{
+	register yy_state_type yy_current_state;
+	register char *yy_cp, *yy_bp;
+	register int yy_act;
+    
+	int str = 0;
+	int ts, i;
+
+	if ( !(yy_init) )
+		{
+		(yy_init) = 1;
+
+#ifdef YY_USER_INIT
+		YY_USER_INIT;
+#endif
+
+		if ( ! (yy_start) )
+			(yy_start) = 1;	/* first start state */
+
+		if ( ! zconfin )
+			zconfin = stdin;
+
+		if ( ! zconfout )
+			zconfout = stdout;
+
+		if ( ! YY_CURRENT_BUFFER ) {
+			zconfensure_buffer_stack ();
+			YY_CURRENT_BUFFER_LVALUE =
+				zconf_create_buffer(zconfin,YY_BUF_SIZE );
+		}
+
+		zconf_load_buffer_state( );
+		}
+
+	while ( 1 )		/* loops until end-of-file is reached */
+		{
+		yy_cp = (yy_c_buf_p);
+
+		/* Support of zconftext. */
+		*yy_cp = (yy_hold_char);
+
+		/* yy_bp points to the position in yy_ch_buf of the start of
+		 * the current run.
+		 */
+		yy_bp = yy_cp;
+
+		yy_current_state = (yy_start);
+yy_match:
+		while ( (yy_current_state = yy_nxt[yy_current_state][ yy_ec[YY_SC_TO_UI(*yy_cp)]  ]) > 0 )
+			++yy_cp;
+
+		yy_current_state = -yy_current_state;
+
+yy_find_action:
+		yy_act = yy_accept[yy_current_state];
+
+		YY_DO_BEFORE_ACTION;
+
+do_action:	/* This label is used only to access EOF actions. */
+
+		switch ( yy_act )
+	{ /* beginning of action switch */
+case 1:
+/* rule 1 can match eol */
+case 2:
+/* rule 2 can match eol */
+YY_RULE_SETUP
+{
+	current_file->lineno++;
+	return T_EOL;
+}
+	YY_BREAK
+case 3:
+YY_RULE_SETUP
+
+	YY_BREAK
+case 4:
+YY_RULE_SETUP
+{
+	BEGIN(COMMAND);
+}
+	YY_BREAK
+case 5:
+YY_RULE_SETUP
+{
+	unput(zconftext[0]);
+	BEGIN(COMMAND);
+}
+	YY_BREAK
+
+case 6:
+YY_RULE_SETUP
+{
+		const struct kconf_id *id = kconf_id_lookup(zconftext, zconfleng);
+		BEGIN(PARAM);
+		current_pos.file = current_file;
+		current_pos.lineno = current_file->lineno;
+		if (id && id->flags & TF_COMMAND) {
+			zconflval.id = id;
+			return id->token;
+		}
+		alloc_string(zconftext, zconfleng);
+		zconflval.string = text;
+		return T_WORD;
+	}
+	YY_BREAK
+case 7:
+YY_RULE_SETUP
+
+	YY_BREAK
+case 8:
+/* rule 8 can match eol */
+YY_RULE_SETUP
+{
+		BEGIN(INITIAL);
+		current_file->lineno++;
+		return T_EOL;
+	}
+	YY_BREAK
+
+case 9:
+YY_RULE_SETUP
+return T_AND;
+	YY_BREAK
+case 10:
+YY_RULE_SETUP
+return T_OR;
+	YY_BREAK
+case 11:
+YY_RULE_SETUP
+return T_OPEN_PAREN;
+	YY_BREAK
+case 12:
+YY_RULE_SETUP
+return T_CLOSE_PAREN;
+	YY_BREAK
+case 13:
+YY_RULE_SETUP
+return T_NOT;
+	YY_BREAK
+case 14:
+YY_RULE_SETUP
+return T_EQUAL;
+	YY_BREAK
+case 15:
+YY_RULE_SETUP
+return T_UNEQUAL;
+	YY_BREAK
+case 16:
+YY_RULE_SETUP
+return T_LESS_EQUAL;
+	YY_BREAK
+case 17:
+YY_RULE_SETUP
+return T_GREATER_EQUAL;
+	YY_BREAK
+case 18:
+YY_RULE_SETUP
+return T_LESS;
+	YY_BREAK
+case 19:
+YY_RULE_SETUP
+return T_GREATER;
+	YY_BREAK
+case 20:
+YY_RULE_SETUP
+{
+		str = zconftext[0];
+		new_string();
+		BEGIN(STRING);
+	}
+	YY_BREAK
+case 21:
+/* rule 21 can match eol */
+YY_RULE_SETUP
+BEGIN(INITIAL); current_file->lineno++; return T_EOL;
+	YY_BREAK
+case 22:
+YY_RULE_SETUP
+/* ignore */
+	YY_BREAK
+case 23:
+YY_RULE_SETUP
+{
+		const struct kconf_id *id = kconf_id_lookup(zconftext, zconfleng);
+		if (id && id->flags & TF_PARAM) {
+			zconflval.id = id;
+			return id->token;
+		}
+		alloc_string(zconftext, zconfleng);
+		zconflval.string = text;
+		return T_WORD;
+	}
+	YY_BREAK
+case 24:
+YY_RULE_SETUP
+/* comment */
+	YY_BREAK
+case 25:
+/* rule 25 can match eol */
+YY_RULE_SETUP
+current_file->lineno++;
+	YY_BREAK
+case 26:
+YY_RULE_SETUP
+
+	YY_BREAK
+case 27:
+YY_RULE_SETUP
+{
+		fprintf(stderr,
+		        "%s:%d:warning: ignoring unsupported character '%c'\n",
+		        zconf_curname(), zconf_lineno(), *zconftext);
+	}
+	YY_BREAK
+case YY_STATE_EOF(PARAM):
+{
+		BEGIN(INITIAL);
+	}
+	YY_BREAK
+
+case 28:
+/* rule 28 can match eol */
+*yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */
+(yy_c_buf_p) = yy_cp -= 1;
+YY_DO_BEFORE_ACTION; /* set up zconftext again */
+YY_RULE_SETUP
+{
+		append_string(zconftext, zconfleng);
+		zconflval.string = text;
+		return T_WORD_QUOTE;
+	}
+	YY_BREAK
+case 29:
+YY_RULE_SETUP
+{
+		append_string(zconftext, zconfleng);
+	}
+	YY_BREAK
+case 30:
+/* rule 30 can match eol */
+*yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */
+(yy_c_buf_p) = yy_cp -= 1;
+YY_DO_BEFORE_ACTION; /* set up zconftext again */
+YY_RULE_SETUP
+{
+		append_string(zconftext + 1, zconfleng - 1);
+		zconflval.string = text;
+		return T_WORD_QUOTE;
+	}
+	YY_BREAK
+case 31:
+YY_RULE_SETUP
+{
+		append_string(zconftext + 1, zconfleng - 1);
+	}
+	YY_BREAK
+case 32:
+YY_RULE_SETUP
+{
+		if (str == zconftext[0]) {
+			BEGIN(PARAM);
+			zconflval.string = text;
+			return T_WORD_QUOTE;
+		} else
+			append_string(zconftext, 1);
+	}
+	YY_BREAK
+case 33:
+/* rule 33 can match eol */
+YY_RULE_SETUP
+{
+		printf("%s:%d:warning: multi-line strings not supported\n", zconf_curname(), zconf_lineno());
+		current_file->lineno++;
+		BEGIN(INITIAL);
+		return T_EOL;
+	}
+	YY_BREAK
+case YY_STATE_EOF(STRING):
+{
+		BEGIN(INITIAL);
+	}
+	YY_BREAK
+
+case 34:
+YY_RULE_SETUP
+{
+		ts = 0;
+		for (i = 0; i < zconfleng; i++) {
+			if (zconftext[i] == '\t')
+				ts = (ts & ~7) + 8;
+			else
+				ts++;
+		}
+		last_ts = ts;
+		if (first_ts) {
+			if (ts < first_ts) {
+				zconf_endhelp();
+				return T_HELPTEXT;
+			}
+			ts -= first_ts;
+			while (ts > 8) {
+				append_string("        ", 8);
+				ts -= 8;
+			}
+			append_string("        ", ts);
+		}
+	}
+	YY_BREAK
+case 35:
+/* rule 35 can match eol */
+*yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */
+(yy_c_buf_p) = yy_cp -= 1;
+YY_DO_BEFORE_ACTION; /* set up zconftext again */
+YY_RULE_SETUP
+{
+		current_file->lineno++;
+		zconf_endhelp();
+		return T_HELPTEXT;
+	}
+	YY_BREAK
+case 36:
+/* rule 36 can match eol */
+YY_RULE_SETUP
+{
+		current_file->lineno++;
+		append_string("\n", 1);
+	}
+	YY_BREAK
+case 37:
+YY_RULE_SETUP
+{
+		while (zconfleng) {
+			if ((zconftext[zconfleng-1] != ' ') && (zconftext[zconfleng-1] != '\t'))
+				break;
+			zconfleng--;
+		}
+		append_string(zconftext, zconfleng);
+		if (!first_ts)
+			first_ts = last_ts;
+	}
+	YY_BREAK
+case YY_STATE_EOF(HELP):
+{
+		zconf_endhelp();
+		return T_HELPTEXT;
+	}
+	YY_BREAK
+
+case YY_STATE_EOF(INITIAL):
+case YY_STATE_EOF(COMMAND):
+{
+	if (current_file) {
+		zconf_endfile();
+		return T_EOL;
+	}
+	fclose(zconfin);
+	yyterminate();
+}
+	YY_BREAK
+case 38:
+YY_RULE_SETUP
+YY_FATAL_ERROR( "flex scanner jammed" );
+	YY_BREAK
+
+	case YY_END_OF_BUFFER:
+		{
+		/* Amount of text matched not including the EOB char. */
+		int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
+
+		/* Undo the effects of YY_DO_BEFORE_ACTION. */
+		*yy_cp = (yy_hold_char);
+		YY_RESTORE_YY_MORE_OFFSET
+
+		if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
+			{
+			/* We're scanning a new file or input source.  It's
+			 * possible that this happened because the user
+			 * just pointed zconfin at a new source and called
+			 * zconflex().  If so, then we have to assure
+			 * consistency between YY_CURRENT_BUFFER and our
+			 * globals.  Here is the right place to do so, because
+			 * this is the first action (other than possibly a
+			 * back-up) that will match for the new input source.
+			 */
+			(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+			YY_CURRENT_BUFFER_LVALUE->yy_input_file = zconfin;
+			YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
+			}
+
+		/* Note that here we test for yy_c_buf_p "<=" to the position
+		 * of the first EOB in the buffer, since yy_c_buf_p will
+		 * already have been incremented past the NUL character
+		 * (since all states make transitions on EOB to the
+		 * end-of-buffer state).  Contrast this with the test
+		 * in input().
+		 */
+		if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+			{ /* This was really a NUL. */
+			yy_state_type yy_next_state;
+
+			(yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
+
+			yy_current_state = yy_get_previous_state(  );
+
+			/* Okay, we're now positioned to make the NUL
+			 * transition.  We couldn't have
+			 * yy_get_previous_state() go ahead and do it
+			 * for us because it doesn't know how to deal
+			 * with the possibility of jamming (and we don't
+			 * want to build jamming into it because then it
+			 * will run more slowly).
+			 */
+
+			yy_next_state = yy_try_NUL_trans( yy_current_state );
+
+			yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+
+			if ( yy_next_state )
+				{
+				/* Consume the NUL. */
+				yy_cp = ++(yy_c_buf_p);
+				yy_current_state = yy_next_state;
+				goto yy_match;
+				}
+
+			else
+				{
+				yy_cp = (yy_c_buf_p);
+				goto yy_find_action;
+				}
+			}
+
+		else switch ( yy_get_next_buffer(  ) )
+			{
+			case EOB_ACT_END_OF_FILE:
+				{
+				(yy_did_buffer_switch_on_eof) = 0;
+
+				if ( zconfwrap( ) )
+					{
+					/* Note: because we've taken care in
+					 * yy_get_next_buffer() to have set up
+					 * zconftext, we can now set up
+					 * yy_c_buf_p so that if some total
+					 * hoser (like flex itself) wants to
+					 * call the scanner after we return the
+					 * YY_NULL, it'll still work - another
+					 * YY_NULL will get returned.
+					 */
+					(yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
+
+					yy_act = YY_STATE_EOF(YY_START);
+					goto do_action;
+					}
+
+				else
+					{
+					if ( ! (yy_did_buffer_switch_on_eof) )
+						YY_NEW_FILE;
+					}
+				break;
+				}
+
+			case EOB_ACT_CONTINUE_SCAN:
+				(yy_c_buf_p) =
+					(yytext_ptr) + yy_amount_of_matched_text;
+
+				yy_current_state = yy_get_previous_state(  );
+
+				yy_cp = (yy_c_buf_p);
+				yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+				goto yy_match;
+
+			case EOB_ACT_LAST_MATCH:
+				(yy_c_buf_p) =
+				&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
+
+				yy_current_state = yy_get_previous_state(  );
+
+				yy_cp = (yy_c_buf_p);
+				yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+				goto yy_find_action;
+			}
+		break;
+		}
+
+	default:
+		YY_FATAL_ERROR(
+			"fatal flex scanner internal error--no action found" );
+	} /* end of action switch */
+		} /* end of scanning one token */
+} /* end of zconflex */
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ *	EOB_ACT_LAST_MATCH -
+ *	EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ *	EOB_ACT_END_OF_FILE - end of file
+ */
+static int yy_get_next_buffer (void)
+{
+    	register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+	register char *source = (yytext_ptr);
+	register int number_to_move, i;
+	int ret_val;
+
+	if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
+		YY_FATAL_ERROR(
+		"fatal flex scanner internal error--end of buffer missed" );
+
+	if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
+		{ /* Don't try to fill the buffer, so this is an EOF. */
+		if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
+			{
+			/* We matched a single character, the EOB, so
+			 * treat this as a final EOF.
+			 */
+			return EOB_ACT_END_OF_FILE;
+			}
+
+		else
+			{
+			/* We matched some text prior to the EOB, first
+			 * process it.
+			 */
+			return EOB_ACT_LAST_MATCH;
+			}
+		}
+
+	/* Try to read more data. */
+
+	/* First move last chars to start of buffer. */
+	number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1;
+
+	for ( i = 0; i < number_to_move; ++i )
+		*(dest++) = *(source++);
+
+	if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+		/* don't do the read, it's not guaranteed to return an EOF,
+		 * just force an EOF
+		 */
+		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
+
+	else
+		{
+			int num_to_read =
+			YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
+
+		while ( num_to_read <= 0 )
+			{ /* Not enough room in the buffer - grow it. */
+
+			/* just a shorter name for the current buffer */
+			YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
+
+			int yy_c_buf_p_offset =
+				(int) ((yy_c_buf_p) - b->yy_ch_buf);
+
+			if ( b->yy_is_our_buffer )
+				{
+				int new_size = b->yy_buf_size * 2;
+
+				if ( new_size <= 0 )
+					b->yy_buf_size += b->yy_buf_size / 8;
+				else
+					b->yy_buf_size *= 2;
+
+				b->yy_ch_buf = (char *)
+					/* Include room in for 2 EOB chars. */
+					zconfrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2  );
+				}
+			else
+				/* Can't grow it, we don't own it. */
+				b->yy_ch_buf = 0;
+
+			if ( ! b->yy_ch_buf )
+				YY_FATAL_ERROR(
+				"fatal error - scanner input buffer overflow" );
+
+			(yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+			num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
+						number_to_move - 1;
+
+			}
+
+		if ( num_to_read > YY_READ_BUF_SIZE )
+			num_to_read = YY_READ_BUF_SIZE;
+
+		/* Read in more data. */
+		YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
+			(yy_n_chars), (size_t) num_to_read );
+
+		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+		}
+
+	if ( (yy_n_chars) == 0 )
+		{
+		if ( number_to_move == YY_MORE_ADJ )
+			{
+			ret_val = EOB_ACT_END_OF_FILE;
+			zconfrestart(zconfin  );
+			}
+
+		else
+			{
+			ret_val = EOB_ACT_LAST_MATCH;
+			YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
+				YY_BUFFER_EOF_PENDING;
+			}
+		}
+
+	else
+		ret_val = EOB_ACT_CONTINUE_SCAN;
+
+	if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
+		/* Extend the array by 50%, plus the number we really need. */
+		yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
+		YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) zconfrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size  );
+		if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+			YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
+	}
+
+	(yy_n_chars) += number_to_move;
+	YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
+	YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
+
+	(yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
+
+	return ret_val;
+}
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+    static yy_state_type yy_get_previous_state (void)
+{
+	register yy_state_type yy_current_state;
+	register char *yy_cp;
+    
+	yy_current_state = (yy_start);
+
+	for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
+		{
+		yy_current_state = yy_nxt[yy_current_state][(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1)];
+		}
+
+	return yy_current_state;
+}
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ *	next_state = yy_try_NUL_trans( current_state );
+ */
+    static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state )
+{
+	register int yy_is_jam;
+    
+	yy_current_state = yy_nxt[yy_current_state][1];
+	yy_is_jam = (yy_current_state <= 0);
+
+	return yy_is_jam ? 0 : yy_current_state;
+}
+
+    static void yyunput (int c, register char * yy_bp )
+{
+	register char *yy_cp;
+    
+    yy_cp = (yy_c_buf_p);
+
+	/* undo effects of setting up zconftext */
+	*yy_cp = (yy_hold_char);
+
+	if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
+		{ /* need to shift things up to make room */
+		/* +2 for EOB chars. */
+		register int number_to_move = (yy_n_chars) + 2;
+		register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[
+					YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
+		register char *source =
+				&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move];
+
+		while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+			*--dest = *--source;
+
+		yy_cp += (int) (dest - source);
+		yy_bp += (int) (dest - source);
+		YY_CURRENT_BUFFER_LVALUE->yy_n_chars =
+			(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_buf_size;
+
+		if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
+			YY_FATAL_ERROR( "flex scanner push-back overflow" );
+		}
+
+	*--yy_cp = (char) c;
+
+	(yytext_ptr) = yy_bp;
+	(yy_hold_char) = *yy_cp;
+	(yy_c_buf_p) = yy_cp;
+}
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+    static int yyinput (void)
+#else
+    static int input  (void)
+#endif
+
+{
+	int c;
+    
+	*(yy_c_buf_p) = (yy_hold_char);
+
+	if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
+		{
+		/* yy_c_buf_p now points to the character we want to return.
+		 * If this occurs *before* the EOB characters, then it's a
+		 * valid NUL; if not, then we've hit the end of the buffer.
+		 */
+		if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+			/* This was really a NUL. */
+			*(yy_c_buf_p) = '\0';
+
+		else
+			{ /* need more input */
+			int offset = (yy_c_buf_p) - (yytext_ptr);
+			++(yy_c_buf_p);
+
+			switch ( yy_get_next_buffer(  ) )
+				{
+				case EOB_ACT_LAST_MATCH:
+					/* This happens because yy_g_n_b()
+					 * sees that we've accumulated a
+					 * token and flags that we need to
+					 * try matching the token before
+					 * proceeding.  But for input(),
+					 * there's no matching to consider.
+					 * So convert the EOB_ACT_LAST_MATCH
+					 * to EOB_ACT_END_OF_FILE.
+					 */
+
+					/* Reset buffer status. */
+					zconfrestart(zconfin );
+
+					/*FALLTHROUGH*/
+
+				case EOB_ACT_END_OF_FILE:
+					{
+					if ( zconfwrap( ) )
+						return EOF;
+
+					if ( ! (yy_did_buffer_switch_on_eof) )
+						YY_NEW_FILE;
+#ifdef __cplusplus
+					return yyinput();
+#else
+					return input();
+#endif
+					}
+
+				case EOB_ACT_CONTINUE_SCAN:
+					(yy_c_buf_p) = (yytext_ptr) + offset;
+					break;
+				}
+			}
+		}
+
+	c = *(unsigned char *) (yy_c_buf_p);	/* cast for 8-bit char's */
+	*(yy_c_buf_p) = '\0';	/* preserve zconftext */
+	(yy_hold_char) = *++(yy_c_buf_p);
+
+	return c;
+}
+#endif	/* ifndef YY_NO_INPUT */
+
+/** Immediately switch to a different input stream.
+ * @param input_file A readable stream.
+ * 
+ * @note This function does not reset the start condition to @c INITIAL .
+ */
+    void zconfrestart  (FILE * input_file )
+{
+    
+	if ( ! YY_CURRENT_BUFFER ){
+        zconfensure_buffer_stack ();
+		YY_CURRENT_BUFFER_LVALUE =
+            zconf_create_buffer(zconfin,YY_BUF_SIZE );
+	}
+
+	zconf_init_buffer(YY_CURRENT_BUFFER,input_file );
+	zconf_load_buffer_state( );
+}
+
+/** Switch to a different input buffer.
+ * @param new_buffer The new input buffer.
+ * 
+ */
+    void zconf_switch_to_buffer  (YY_BUFFER_STATE  new_buffer )
+{
+    
+	/* TODO. We should be able to replace this entire function body
+	 * with
+	 *		zconfpop_buffer_state();
+	 *		zconfpush_buffer_state(new_buffer);
+     */
+	zconfensure_buffer_stack ();
+	if ( YY_CURRENT_BUFFER == new_buffer )
+		return;
+
+	if ( YY_CURRENT_BUFFER )
+		{
+		/* Flush out information for old buffer. */
+		*(yy_c_buf_p) = (yy_hold_char);
+		YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+		}
+
+	YY_CURRENT_BUFFER_LVALUE = new_buffer;
+	zconf_load_buffer_state( );
+
+	/* We don't actually know whether we did this switch during
+	 * EOF (zconfwrap()) processing, but the only time this flag
+	 * is looked at is after zconfwrap() is called, so it's safe
+	 * to go ahead and always set it.
+	 */
+	(yy_did_buffer_switch_on_eof) = 1;
+}
+
+static void zconf_load_buffer_state  (void)
+{
+    	(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+	(yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
+	zconfin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
+	(yy_hold_char) = *(yy_c_buf_p);
+}
+
+/** Allocate and initialize an input buffer state.
+ * @param file A readable stream.
+ * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
+ * 
+ * @return the allocated buffer state.
+ */
+    YY_BUFFER_STATE zconf_create_buffer  (FILE * file, int  size )
+{
+	YY_BUFFER_STATE b;
+    
+	b = (YY_BUFFER_STATE) zconfalloc(sizeof( struct yy_buffer_state )  );
+	if ( ! b )
+		YY_FATAL_ERROR( "out of dynamic memory in zconf_create_buffer()" );
+
+	b->yy_buf_size = size;
+
+	/* yy_ch_buf has to be 2 characters longer than the size given because
+	 * we need to put in 2 end-of-buffer characters.
+	 */
+	b->yy_ch_buf = (char *) zconfalloc(b->yy_buf_size + 2  );
+	if ( ! b->yy_ch_buf )
+		YY_FATAL_ERROR( "out of dynamic memory in zconf_create_buffer()" );
+
+	b->yy_is_our_buffer = 1;
+
+	zconf_init_buffer(b,file );
+
+	return b;
+}
+
+/** Destroy the buffer.
+ * @param b a buffer created with zconf_create_buffer()
+ * 
+ */
+    void zconf_delete_buffer (YY_BUFFER_STATE  b )
+{
+    
+	if ( ! b )
+		return;
+
+	if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
+		YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
+
+	if ( b->yy_is_our_buffer )
+		zconffree((void *) b->yy_ch_buf  );
+
+	zconffree((void *) b  );
+}
+
+/* Initializes or reinitializes a buffer.
+ * This function is sometimes called more than once on the same buffer,
+ * such as during a zconfrestart() or at EOF.
+ */
+    static void zconf_init_buffer  (YY_BUFFER_STATE  b, FILE * file )
+
+{
+	int oerrno = errno;
+    
+	zconf_flush_buffer(b );
+
+	b->yy_input_file = file;
+	b->yy_fill_buffer = 1;
+
+    /* If b is the current buffer, then zconf_init_buffer was _probably_
+     * called from zconfrestart() or through yy_get_next_buffer.
+     * In that case, we don't want to reset the lineno or column.
+     */
+    if (b != YY_CURRENT_BUFFER){
+        b->yy_bs_lineno = 1;
+        b->yy_bs_column = 0;
+    }
+
+        b->yy_is_interactive = 0;
+    
+	errno = oerrno;
+}
+
+/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
+ * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
+ * 
+ */
+    void zconf_flush_buffer (YY_BUFFER_STATE  b )
+{
+    	if ( ! b )
+		return;
+
+	b->yy_n_chars = 0;
+
+	/* We always need two end-of-buffer characters.  The first causes
+	 * a transition to the end-of-buffer state.  The second causes
+	 * a jam in that state.
+	 */
+	b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+	b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+	b->yy_buf_pos = &b->yy_ch_buf[0];
+
+	b->yy_at_bol = 1;
+	b->yy_buffer_status = YY_BUFFER_NEW;
+
+	if ( b == YY_CURRENT_BUFFER )
+		zconf_load_buffer_state( );
+}
+
+/** Pushes the new state onto the stack. The new state becomes
+ *  the current state. This function will allocate the stack
+ *  if necessary.
+ *  @param new_buffer The new state.
+ *  
+ */
+void zconfpush_buffer_state (YY_BUFFER_STATE new_buffer )
+{
+    	if (new_buffer == NULL)
+		return;
+
+	zconfensure_buffer_stack();
+
+	/* This block is copied from zconf_switch_to_buffer. */
+	if ( YY_CURRENT_BUFFER )
+		{
+		/* Flush out information for old buffer. */
+		*(yy_c_buf_p) = (yy_hold_char);
+		YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+		}
+
+	/* Only push if top exists. Otherwise, replace top. */
+	if (YY_CURRENT_BUFFER)
+		(yy_buffer_stack_top)++;
+	YY_CURRENT_BUFFER_LVALUE = new_buffer;
+
+	/* copied from zconf_switch_to_buffer. */
+	zconf_load_buffer_state( );
+	(yy_did_buffer_switch_on_eof) = 1;
+}
+
+/** Removes and deletes the top of the stack, if present.
+ *  The next element becomes the new top.
+ *  
+ */
+void zconfpop_buffer_state (void)
+{
+    	if (!YY_CURRENT_BUFFER)
+		return;
+
+	zconf_delete_buffer(YY_CURRENT_BUFFER );
+	YY_CURRENT_BUFFER_LVALUE = NULL;
+	if ((yy_buffer_stack_top) > 0)
+		--(yy_buffer_stack_top);
+
+	if (YY_CURRENT_BUFFER) {
+		zconf_load_buffer_state( );
+		(yy_did_buffer_switch_on_eof) = 1;
+	}
+}
+
+/* Allocates the stack if it does not exist.
+ *  Guarantees space for at least one push.
+ */
+static void zconfensure_buffer_stack (void)
+{
+	int num_to_alloc;
+    
+	if (!(yy_buffer_stack)) {
+
+		/* First allocation is just for 2 elements, since we don't know if this
+		 * scanner will even need a stack. We use 2 instead of 1 to avoid an
+		 * immediate realloc on the next call.
+         */
+		num_to_alloc = 1;
+		(yy_buffer_stack) = (struct yy_buffer_state**)zconfalloc
+								(num_to_alloc * sizeof(struct yy_buffer_state*)
+								);
+		if ( ! (yy_buffer_stack) )
+			YY_FATAL_ERROR( "out of dynamic memory in zconfensure_buffer_stack()" );
+								  
+		memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
+				
+		(yy_buffer_stack_max) = num_to_alloc;
+		(yy_buffer_stack_top) = 0;
+		return;
+	}
+
+	if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
+
+		/* Increase the buffer to prepare for a possible push. */
+		int grow_size = 8 /* arbitrary grow size */;
+
+		num_to_alloc = (yy_buffer_stack_max) + grow_size;
+		(yy_buffer_stack) = (struct yy_buffer_state**)zconfrealloc
+								((yy_buffer_stack),
+								num_to_alloc * sizeof(struct yy_buffer_state*)
+								);
+		if ( ! (yy_buffer_stack) )
+			YY_FATAL_ERROR( "out of dynamic memory in zconfensure_buffer_stack()" );
+
+		/* zero only the new slots.*/
+		memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
+		(yy_buffer_stack_max) = num_to_alloc;
+	}
+}
+
+/** Setup the input buffer state to scan directly from a user-specified character buffer.
+ * @param base the character buffer
+ * @param size the size in bytes of the character buffer
+ * 
+ * @return the newly allocated buffer state object. 
+ */
+YY_BUFFER_STATE zconf_scan_buffer  (char * base, yy_size_t  size )
+{
+	YY_BUFFER_STATE b;
+    
+	if ( size < 2 ||
+	     base[size-2] != YY_END_OF_BUFFER_CHAR ||
+	     base[size-1] != YY_END_OF_BUFFER_CHAR )
+		/* They forgot to leave room for the EOB's. */
+		return 0;
+
+	b = (YY_BUFFER_STATE) zconfalloc(sizeof( struct yy_buffer_state )  );
+	if ( ! b )
+		YY_FATAL_ERROR( "out of dynamic memory in zconf_scan_buffer()" );
+
+	b->yy_buf_size = size - 2;	/* "- 2" to take care of EOB's */
+	b->yy_buf_pos = b->yy_ch_buf = base;
+	b->yy_is_our_buffer = 0;
+	b->yy_input_file = 0;
+	b->yy_n_chars = b->yy_buf_size;
+	b->yy_is_interactive = 0;
+	b->yy_at_bol = 1;
+	b->yy_fill_buffer = 0;
+	b->yy_buffer_status = YY_BUFFER_NEW;
+
+	zconf_switch_to_buffer(b  );
+
+	return b;
+}
+
+/** Setup the input buffer state to scan a string. The next call to zconflex() will
+ * scan from a @e copy of @a str.
+ * @param yystr a NUL-terminated string to scan
+ * 
+ * @return the newly allocated buffer state object.
+ * @note If you want to scan bytes that may contain NUL values, then use
+ *       zconf_scan_bytes() instead.
+ */
+YY_BUFFER_STATE zconf_scan_string (yyconst char * yystr )
+{
+    
+	return zconf_scan_bytes(yystr,strlen(yystr) );
+}
+
+/** Setup the input buffer state to scan the given bytes. The next call to zconflex() will
+ * scan from a @e copy of @a bytes.
+ * @param bytes the byte buffer to scan
+ * @param len the number of bytes in the buffer pointed to by @a bytes.
+ * 
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE zconf_scan_bytes  (yyconst char * yybytes, int  _yybytes_len )
+{
+	YY_BUFFER_STATE b;
+	char *buf;
+	yy_size_t n;
+	int i;
+    
+	/* Get memory for full buffer, including space for trailing EOB's. */
+	n = _yybytes_len + 2;
+	buf = (char *) zconfalloc(n  );
+	if ( ! buf )
+		YY_FATAL_ERROR( "out of dynamic memory in zconf_scan_bytes()" );
+
+	for ( i = 0; i < _yybytes_len; ++i )
+		buf[i] = yybytes[i];
+
+	buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
+
+	b = zconf_scan_buffer(buf,n );
+	if ( ! b )
+		YY_FATAL_ERROR( "bad buffer in zconf_scan_bytes()" );
+
+	/* It's okay to grow etc. this buffer, and we should throw it
+	 * away when we're done.
+	 */
+	b->yy_is_our_buffer = 1;
+
+	return b;
+}
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+static void yy_fatal_error (yyconst char* msg )
+{
+    	(void) fprintf( stderr, "%s\n", msg );
+	exit( YY_EXIT_FAILURE );
+}
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+	do \
+		{ \
+		/* Undo effects of setting up zconftext. */ \
+        int yyless_macro_arg = (n); \
+        YY_LESS_LINENO(yyless_macro_arg);\
+		zconftext[zconfleng] = (yy_hold_char); \
+		(yy_c_buf_p) = zconftext + yyless_macro_arg; \
+		(yy_hold_char) = *(yy_c_buf_p); \
+		*(yy_c_buf_p) = '\0'; \
+		zconfleng = yyless_macro_arg; \
+		} \
+	while ( 0 )
+
+/* Accessor  methods (get/set functions) to struct members. */
+
+/** Get the current line number.
+ * 
+ */
+int zconfget_lineno  (void)
+{
+        
+    return zconflineno;
+}
+
+/** Get the input stream.
+ * 
+ */
+FILE *zconfget_in  (void)
+{
+        return zconfin;
+}
+
+/** Get the output stream.
+ * 
+ */
+FILE *zconfget_out  (void)
+{
+        return zconfout;
+}
+
+/** Get the length of the current token.
+ * 
+ */
+int zconfget_leng  (void)
+{
+        return zconfleng;
+}
+
+/** Get the current token.
+ * 
+ */
+
+char *zconfget_text  (void)
+{
+        return zconftext;
+}
+
+/** Set the current line number.
+ * @param line_number
+ * 
+ */
+void zconfset_lineno (int  line_number )
+{
+    
+    zconflineno = line_number;
+}
+
+/** Set the input stream. This does not discard the current
+ * input buffer.
+ * @param in_str A readable stream.
+ * 
+ * @see zconf_switch_to_buffer
+ */
+void zconfset_in (FILE *  in_str )
+{
+        zconfin = in_str ;
+}
+
+void zconfset_out (FILE *  out_str )
+{
+        zconfout = out_str ;
+}
+
+int zconfget_debug  (void)
+{
+        return zconf_flex_debug;
+}
+
+void zconfset_debug (int  bdebug )
+{
+        zconf_flex_debug = bdebug ;
+}
+
+static int yy_init_globals (void)
+{
+        /* Initialization is the same as for the non-reentrant scanner.
+     * This function is called from zconflex_destroy(), so don't allocate here.
+     */
+
+    (yy_buffer_stack) = 0;
+    (yy_buffer_stack_top) = 0;
+    (yy_buffer_stack_max) = 0;
+    (yy_c_buf_p) = (char *) 0;
+    (yy_init) = 0;
+    (yy_start) = 0;
+
+/* Defined in main.c */
+#ifdef YY_STDINIT
+    zconfin = stdin;
+    zconfout = stdout;
+#else
+    zconfin = (FILE *) 0;
+    zconfout = (FILE *) 0;
+#endif
+
+    /* For future reference: Set errno on error, since we are called by
+     * zconflex_init()
+     */
+    return 0;
+}
+
+/* zconflex_destroy is for both reentrant and non-reentrant scanners. */
+int zconflex_destroy  (void)
+{
+    
+    /* Pop the buffer stack, destroying each element. */
+	while(YY_CURRENT_BUFFER){
+		zconf_delete_buffer(YY_CURRENT_BUFFER  );
+		YY_CURRENT_BUFFER_LVALUE = NULL;
+		zconfpop_buffer_state();
+	}
+
+	/* Destroy the stack itself. */
+	zconffree((yy_buffer_stack) );
+	(yy_buffer_stack) = NULL;
+
+    /* Reset the globals. This is important in a non-reentrant scanner so the next time
+     * zconflex() is called, initialization will occur. */
+    yy_init_globals( );
+
+    return 0;
+}
+
+/*
+ * Internal utility routines.
+ */
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
+{
+	register int i;
+	for ( i = 0; i < n; ++i )
+		s1[i] = s2[i];
+}
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * s )
+{
+	register int n;
+	for ( n = 0; s[n]; ++n )
+		;
+
+	return n;
+}
+#endif
+
+void *zconfalloc (yy_size_t  size )
+{
+	return (void *) malloc( size );
+}
+
+void *zconfrealloc  (void * ptr, yy_size_t  size )
+{
+	/* The cast to (char *) in the following accommodates both
+	 * implementations that use char* generic pointers, and those
+	 * that use void* generic pointers.  It works with the latter
+	 * because both ANSI C and C++ allow castless assignment from
+	 * any pointer type to void*, and deal with argument conversions
+	 * as though doing an assignment.
+	 */
+	return (void *) realloc( (char *) ptr, size );
+}
+
+void zconffree (void * ptr )
+{
+	free( (char *) ptr );	/* see zconfrealloc() for (char *) cast */
+}
+
+#define YYTABLES_NAME "yytables"
+
+void zconf_starthelp(void)
+{
+	new_string();
+	last_ts = first_ts = 0;
+	BEGIN(HELP);
+}
+
+static void zconf_endhelp(void)
+{
+	zconflval.string = text;
+	BEGIN(INITIAL);
+}
+
+/*
+ * Try to open specified file with following names:
+ * ./name
+ * $(srctree)/name
+ * The latter is used when srctree is separate from objtree
+ * when compiling the kernel.
+ * Return NULL if file is not found.
+ */
+FILE *zconf_fopen(const char *name)
+{
+	char *env, fullname[PATH_MAX+1];
+	FILE *f;
+
+	f = fopen(name, "r");
+	if (!f && name != NULL && name[0] != '/') {
+		env = getenv(SRCTREE);
+		if (env) {
+			sprintf(fullname, "%s/%s", env, name);
+			f = fopen(fullname, "r");
+		}
+	}
+	return f;
+}
+
+void zconf_initscan(const char *name)
+{
+	zconfin = zconf_fopen(name);
+	if (!zconfin) {
+		printf("can't find file %s\n", name);
+		exit(1);
+	}
+
+	current_buf = xmalloc(sizeof(*current_buf));
+	memset(current_buf, 0, sizeof(*current_buf));
+
+	current_file = file_lookup(name);
+	current_file->lineno = 1;
+}
+
+void zconf_nextfile(const char *name)
+{
+	struct file *iter;
+	struct file *file = file_lookup(name);
+	struct buffer *buf = xmalloc(sizeof(*buf));
+	memset(buf, 0, sizeof(*buf));
+
+	current_buf->state = YY_CURRENT_BUFFER;
+	zconfin = zconf_fopen(file->name);
+	if (!zconfin) {
+		printf("%s:%d: can't open file \"%s\"\n",
+		    zconf_curname(), zconf_lineno(), file->name);
+		exit(1);
+	}
+	zconf_switch_to_buffer(zconf_create_buffer(zconfin,YY_BUF_SIZE));
+	buf->parent = current_buf;
+	current_buf = buf;
+
+	for (iter = current_file->parent; iter; iter = iter->parent ) {
+		if (!strcmp(current_file->name,iter->name) ) {
+			printf("%s:%d: recursive inclusion detected. "
+			       "Inclusion path:\n  current file : '%s'\n",
+			       zconf_curname(), zconf_lineno(),
+			       zconf_curname());
+			iter = current_file->parent;
+			while (iter && \
+			       strcmp(iter->name,current_file->name)) {
+				printf("  included from: '%s:%d'\n",
+				       iter->name, iter->lineno-1);
+				iter = iter->parent;
+			}
+			if (iter)
+				printf("  included from: '%s:%d'\n",
+				       iter->name, iter->lineno+1);
+			exit(1);
+		}
+	}
+	file->lineno = 1;
+	file->parent = current_file;
+	current_file = file;
+}
+
+static void zconf_endfile(void)
+{
+	struct buffer *parent;
+
+	current_file = current_file->parent;
+
+	parent = current_buf->parent;
+	if (parent) {
+		fclose(zconfin);
+		zconf_delete_buffer(YY_CURRENT_BUFFER);
+		zconf_switch_to_buffer(parent->state);
+	}
+	free(current_buf);
+	current_buf = parent;
+}
+
+int zconf_lineno(void)
+{
+	return current_pos.lineno;
+}
+
+const char *zconf_curname(void)
+{
+	return current_pos.file ? current_pos.file->name : "<none>";
+}
+
diff -Nur linux-4.2/scripts/kconfig/zconf.tab.c linux-4.2-pck/scripts/kconfig/zconf.tab.c
--- linux-4.2/scripts/kconfig/zconf.tab.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/kconfig/zconf.tab.c	2015-09-08 01:26:14.839136029 -0300
@@ -0,0 +1,2580 @@
+/* A Bison parser, made by GNU Bison 2.5.1.  */
+
+/* Bison implementation for Yacc-like parsers in C
+   
+      Copyright (C) 1984, 1989-1990, 2000-2012 Free Software Foundation, Inc.
+   
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* As a special exception, you may create a larger work that contains
+   part or all of the Bison parser skeleton and distribute that work
+   under terms of your choice, so long as that work isn't itself a
+   parser generator using the skeleton or a modified version thereof
+   as a parser skeleton.  Alternatively, if you modify or redistribute
+   the parser skeleton itself, you may (at your option) remove this
+   special exception, which will cause the skeleton and the resulting
+   Bison output files to be licensed under the GNU General Public
+   License without this special exception.
+   
+   This special exception was added by the Free Software Foundation in
+   version 2.2 of Bison.  */
+
+/* C LALR(1) parser skeleton written by Richard Stallman, by
+   simplifying the original so-called "semantic" parser.  */
+
+/* All symbols defined below should begin with yy or YY, to avoid
+   infringing on user name space.  This should be done even for local
+   variables, as they might otherwise be expanded by user macros.
+   There are some unavoidable exceptions within include files to
+   define necessary library symbols; they are noted "INFRINGES ON
+   USER NAME SPACE" below.  */
+
+/* Identify Bison output.  */
+#define YYBISON 1
+
+/* Bison version.  */
+#define YYBISON_VERSION "2.5.1"
+
+/* Skeleton name.  */
+#define YYSKELETON_NAME "yacc.c"
+
+/* Pure parsers.  */
+#define YYPURE 0
+
+/* Push parsers.  */
+#define YYPUSH 0
+
+/* Pull parsers.  */
+#define YYPULL 1
+
+/* Using locations.  */
+#define YYLSP_NEEDED 0
+
+/* Substitute the variable and function names.  */
+#define yyparse         zconfparse
+#define yylex           zconflex
+#define yyerror         zconferror
+#define yylval          zconflval
+#define yychar          zconfchar
+#define yydebug         zconfdebug
+#define yynerrs         zconfnerrs
+
+
+/* Copy the first part of user declarations.  */
+
+
+/*
+ * Copyright (C) 2002 Roman Zippel <zippel@linux-m68k.org>
+ * Released under the terms of the GNU GPL v2.0.
+ */
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include "lkc.h"
+
+#define printd(mask, fmt...) if (cdebug & (mask)) printf(fmt)
+
+#define PRINTD		0x0001
+#define DEBUG_PARSE	0x0002
+
+int cdebug = PRINTD;
+
+extern int zconflex(void);
+static void zconfprint(const char *err, ...);
+static void zconf_error(const char *err, ...);
+static void zconferror(const char *err);
+static bool zconf_endtoken(const struct kconf_id *id, int starttoken, int endtoken);
+
+struct symbol *symbol_hash[SYMBOL_HASHSIZE];
+
+static struct menu *current_menu, *current_entry;
+
+
+
+
+# ifndef YY_NULL
+#  if defined __cplusplus && 201103L <= __cplusplus
+#   define YY_NULL nullptr
+#  else
+#   define YY_NULL 0
+#  endif
+# endif
+
+/* Enabling traces.  */
+#ifndef YYDEBUG
+# define YYDEBUG 1
+#endif
+
+/* Enabling verbose error messages.  */
+#ifdef YYERROR_VERBOSE
+# undef YYERROR_VERBOSE
+# define YYERROR_VERBOSE 1
+#else
+# define YYERROR_VERBOSE 0
+#endif
+
+/* Enabling the token table.  */
+#ifndef YYTOKEN_TABLE
+# define YYTOKEN_TABLE 0
+#endif
+
+
+/* Tokens.  */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+   /* Put the tokens into the symbol table, so that GDB and other debuggers
+      know about them.  */
+   enum yytokentype {
+     T_MAINMENU = 258,
+     T_MENU = 259,
+     T_ENDMENU = 260,
+     T_SOURCE = 261,
+     T_CHOICE = 262,
+     T_ENDCHOICE = 263,
+     T_COMMENT = 264,
+     T_CONFIG = 265,
+     T_MENUCONFIG = 266,
+     T_HELP = 267,
+     T_HELPTEXT = 268,
+     T_IF = 269,
+     T_ENDIF = 270,
+     T_DEPENDS = 271,
+     T_OPTIONAL = 272,
+     T_PROMPT = 273,
+     T_TYPE = 274,
+     T_DEFAULT = 275,
+     T_SELECT = 276,
+     T_RANGE = 277,
+     T_VISIBLE = 278,
+     T_OPTION = 279,
+     T_ON = 280,
+     T_WORD = 281,
+     T_WORD_QUOTE = 282,
+     T_UNEQUAL = 283,
+     T_LESS = 284,
+     T_LESS_EQUAL = 285,
+     T_GREATER = 286,
+     T_GREATER_EQUAL = 287,
+     T_CLOSE_PAREN = 288,
+     T_OPEN_PAREN = 289,
+     T_EOL = 290,
+     T_OR = 291,
+     T_AND = 292,
+     T_EQUAL = 293,
+     T_NOT = 294
+   };
+#endif
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+{
+
+
+	char *string;
+	struct file *file;
+	struct symbol *symbol;
+	struct expr *expr;
+	struct menu *menu;
+	const struct kconf_id *id;
+
+
+
+} YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+/* Copy the second part of user declarations.  */
+
+
+/* Include zconf.hash.c here so it can see the token constants. */
+#include "zconf.hash.c"
+
+
+
+#ifdef short
+# undef short
+#endif
+
+#ifdef YYTYPE_UINT8
+typedef YYTYPE_UINT8 yytype_uint8;
+#else
+typedef unsigned char yytype_uint8;
+#endif
+
+#ifdef YYTYPE_INT8
+typedef YYTYPE_INT8 yytype_int8;
+#elif (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+typedef signed char yytype_int8;
+#else
+typedef short int yytype_int8;
+#endif
+
+#ifdef YYTYPE_UINT16
+typedef YYTYPE_UINT16 yytype_uint16;
+#else
+typedef unsigned short int yytype_uint16;
+#endif
+
+#ifdef YYTYPE_INT16
+typedef YYTYPE_INT16 yytype_int16;
+#else
+typedef short int yytype_int16;
+#endif
+
+#ifndef YYSIZE_T
+# ifdef __SIZE_TYPE__
+#  define YYSIZE_T __SIZE_TYPE__
+# elif defined size_t
+#  define YYSIZE_T size_t
+# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+#  include <stddef.h> /* INFRINGES ON USER NAME SPACE */
+#  define YYSIZE_T size_t
+# else
+#  define YYSIZE_T unsigned int
+# endif
+#endif
+
+#define YYSIZE_MAXIMUM ((YYSIZE_T) -1)
+
+#ifndef YY_
+# if defined YYENABLE_NLS && YYENABLE_NLS
+#  if ENABLE_NLS
+#   include <libintl.h> /* INFRINGES ON USER NAME SPACE */
+#   define YY_(msgid) dgettext ("bison-runtime", msgid)
+#  endif
+# endif
+# ifndef YY_
+#  define YY_(msgid) msgid
+# endif
+#endif
+
+/* Suppress unused-variable warnings by "using" E.  */
+#if ! defined lint || defined __GNUC__
+# define YYUSE(e) ((void) (e))
+#else
+# define YYUSE(e) /* empty */
+#endif
+
+/* Identity function, used to suppress warnings about constant conditions.  */
+#ifndef lint
+# define YYID(n) (n)
+#else
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static int
+YYID (int yyi)
+#else
+static int
+YYID (yyi)
+    int yyi;
+#endif
+{
+  return yyi;
+}
+#endif
+
+#if ! defined yyoverflow || YYERROR_VERBOSE
+
+/* The parser invokes alloca or malloc; define the necessary symbols.  */
+
+# ifdef YYSTACK_USE_ALLOCA
+#  if YYSTACK_USE_ALLOCA
+#   ifdef __GNUC__
+#    define YYSTACK_ALLOC __builtin_alloca
+#   elif defined __BUILTIN_VA_ARG_INCR
+#    include <alloca.h> /* INFRINGES ON USER NAME SPACE */
+#   elif defined _AIX
+#    define YYSTACK_ALLOC __alloca
+#   elif defined _MSC_VER
+#    include <malloc.h> /* INFRINGES ON USER NAME SPACE */
+#    define alloca _alloca
+#   else
+#    define YYSTACK_ALLOC alloca
+#    if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+#     include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+      /* Use EXIT_SUCCESS as a witness for stdlib.h.  */
+#     ifndef EXIT_SUCCESS
+#      define EXIT_SUCCESS 0
+#     endif
+#    endif
+#   endif
+#  endif
+# endif
+
+# ifdef YYSTACK_ALLOC
+   /* Pacify GCC's `empty if-body' warning.  */
+#  define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0))
+#  ifndef YYSTACK_ALLOC_MAXIMUM
+    /* The OS might guarantee only one guard page at the bottom of the stack,
+       and a page size can be as small as 4096 bytes.  So we cannot safely
+       invoke alloca (N) if N exceeds 4096.  Use a slightly smaller number
+       to allow for a few compiler-allocated temporary stack slots.  */
+#   define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
+#  endif
+# else
+#  define YYSTACK_ALLOC YYMALLOC
+#  define YYSTACK_FREE YYFREE
+#  ifndef YYSTACK_ALLOC_MAXIMUM
+#   define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
+#  endif
+#  if (defined __cplusplus && ! defined EXIT_SUCCESS \
+       && ! ((defined YYMALLOC || defined malloc) \
+	     && (defined YYFREE || defined free)))
+#   include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+#   ifndef EXIT_SUCCESS
+#    define EXIT_SUCCESS 0
+#   endif
+#  endif
+#  ifndef YYMALLOC
+#   define YYMALLOC malloc
+#   if ! defined malloc && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
+#   endif
+#  endif
+#  ifndef YYFREE
+#   define YYFREE free
+#   if ! defined free && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+void free (void *); /* INFRINGES ON USER NAME SPACE */
+#   endif
+#  endif
+# endif
+#endif /* ! defined yyoverflow || YYERROR_VERBOSE */
+
+
+#if (! defined yyoverflow \
+     && (! defined __cplusplus \
+	 || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
+
+/* A type that is properly aligned for any stack member.  */
+union yyalloc
+{
+  yytype_int16 yyss_alloc;
+  YYSTYPE yyvs_alloc;
+};
+
+/* The size of the maximum gap between one aligned stack and the next.  */
+# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
+
+/* The size of an array large to enough to hold all stacks, each with
+   N elements.  */
+# define YYSTACK_BYTES(N) \
+     ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \
+      + YYSTACK_GAP_MAXIMUM)
+
+# define YYCOPY_NEEDED 1
+
+/* Relocate STACK from its old location to the new one.  The
+   local variables YYSIZE and YYSTACKSIZE give the old and new number of
+   elements in the stack, and YYPTR gives the new location of the
+   stack.  Advance YYPTR to a properly aligned location for the next
+   stack.  */
+# define YYSTACK_RELOCATE(Stack_alloc, Stack)				\
+    do									\
+      {									\
+	YYSIZE_T yynewbytes;						\
+	YYCOPY (&yyptr->Stack_alloc, Stack, yysize);			\
+	Stack = &yyptr->Stack_alloc;					\
+	yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
+	yyptr += yynewbytes / sizeof (*yyptr);				\
+      }									\
+    while (YYID (0))
+
+#endif
+
+#if defined YYCOPY_NEEDED && YYCOPY_NEEDED
+/* Copy COUNT objects from SRC to DST.  The source and destination do
+   not overlap.  */
+# ifndef YYCOPY
+#  if defined __GNUC__ && 1 < __GNUC__
+#   define YYCOPY(Dst, Src, Count) \
+      __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src)))
+#  else
+#   define YYCOPY(Dst, Src, Count)              \
+      do                                        \
+        {                                       \
+          YYSIZE_T yyi;                         \
+          for (yyi = 0; yyi < (Count); yyi++)   \
+            (Dst)[yyi] = (Src)[yyi];            \
+        }                                       \
+      while (YYID (0))
+#  endif
+# endif
+#endif /* !YYCOPY_NEEDED */
+
+/* YYFINAL -- State number of the termination state.  */
+#define YYFINAL  11
+/* YYLAST -- Last index in YYTABLE.  */
+#define YYLAST   298
+
+/* YYNTOKENS -- Number of terminals.  */
+#define YYNTOKENS  40
+/* YYNNTS -- Number of nonterminals.  */
+#define YYNNTS  50
+/* YYNRULES -- Number of rules.  */
+#define YYNRULES  122
+/* YYNRULES -- Number of states.  */
+#define YYNSTATES  199
+
+/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
+#define YYUNDEFTOK  2
+#define YYMAXUTOK   294
+
+#define YYTRANSLATE(YYX)						\
+  ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
+
+/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX.  */
+static const yytype_uint8 yytranslate[] =
+{
+       0,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     1,     2,     3,     4,
+       5,     6,     7,     8,     9,    10,    11,    12,    13,    14,
+      15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
+      25,    26,    27,    28,    29,    30,    31,    32,    33,    34,
+      35,    36,    37,    38,    39
+};
+
+#if YYDEBUG
+/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
+   YYRHS.  */
+static const yytype_uint16 yyprhs[] =
+{
+       0,     0,     3,     6,     8,    11,    13,    14,    17,    20,
+      23,    26,    31,    36,    40,    42,    44,    46,    48,    50,
+      52,    54,    56,    58,    60,    62,    64,    66,    68,    72,
+      75,    79,    82,    86,    89,    90,    93,    96,    99,   102,
+     105,   108,   112,   117,   122,   127,   133,   137,   138,   142,
+     143,   146,   150,   153,   155,   159,   160,   163,   166,   169,
+     172,   175,   180,   184,   187,   192,   193,   196,   200,   202,
+     206,   207,   210,   213,   216,   220,   224,   228,   230,   234,
+     235,   238,   241,   244,   248,   252,   255,   258,   261,   262,
+     265,   268,   271,   276,   277,   280,   283,   286,   287,   290,
+     292,   294,   297,   300,   303,   305,   308,   309,   312,   314,
+     318,   322,   326,   330,   334,   338,   342,   345,   349,   353,
+     355,   357,   358
+};
+
+/* YYRHS -- A `-1'-separated list of the rules' RHS.  */
+static const yytype_int8 yyrhs[] =
+{
+      41,     0,    -1,    85,    42,    -1,    42,    -1,    67,    43,
+      -1,    43,    -1,    -1,    43,    45,    -1,    43,    59,    -1,
+      43,    71,    -1,    43,    84,    -1,    43,    26,     1,    35,
+      -1,    43,    44,     1,    35,    -1,    43,     1,    35,    -1,
+      16,    -1,    18,    -1,    19,    -1,    21,    -1,    17,    -1,
+      22,    -1,    20,    -1,    23,    -1,    35,    -1,    65,    -1,
+      75,    -1,    48,    -1,    50,    -1,    73,    -1,    26,     1,
+      35,    -1,     1,    35,    -1,    10,    26,    35,    -1,    47,
+      51,    -1,    11,    26,    35,    -1,    49,    51,    -1,    -1,
+      51,    52,    -1,    51,    53,    -1,    51,    79,    -1,    51,
+      77,    -1,    51,    46,    -1,    51,    35,    -1,    19,    82,
+      35,    -1,    18,    83,    86,    35,    -1,    20,    87,    86,
+      35,    -1,    21,    26,    86,    35,    -1,    22,    88,    88,
+      86,    35,    -1,    24,    54,    35,    -1,    -1,    54,    26,
+      55,    -1,    -1,    38,    83,    -1,     7,    89,    35,    -1,
+      56,    60,    -1,    84,    -1,    57,    62,    58,    -1,    -1,
+      60,    61,    -1,    60,    79,    -1,    60,    77,    -1,    60,
+      35,    -1,    60,    46,    -1,    18,    83,    86,    35,    -1,
+      19,    82,    35,    -1,    17,    35,    -1,    20,    26,    86,
+      35,    -1,    -1,    62,    45,    -1,    14,    87,    85,    -1,
+      84,    -1,    63,    66,    64,    -1,    -1,    66,    45,    -1,
+      66,    71,    -1,    66,    59,    -1,     3,    83,    85,    -1,
+       4,    83,    35,    -1,    68,    80,    78,    -1,    84,    -1,
+      69,    72,    70,    -1,    -1,    72,    45,    -1,    72,    71,
+      -1,    72,    59,    -1,     6,    83,    35,    -1,     9,    83,
+      35,    -1,    74,    78,    -1,    12,    35,    -1,    76,    13,
+      -1,    -1,    78,    79,    -1,    78,    35,    -1,    78,    46,
+      -1,    16,    25,    87,    35,    -1,    -1,    80,    81,    -1,
+      80,    35,    -1,    23,    86,    -1,    -1,    83,    86,    -1,
+      26,    -1,    27,    -1,     5,    35,    -1,     8,    35,    -1,
+      15,    35,    -1,    35,    -1,    85,    35,    -1,    -1,    14,
+      87,    -1,    88,    -1,    88,    29,    88,    -1,    88,    30,
+      88,    -1,    88,    31,    88,    -1,    88,    32,    88,    -1,
+      88,    38,    88,    -1,    88,    28,    88,    -1,    34,    87,
+      33,    -1,    39,    87,    -1,    87,    36,    87,    -1,    87,
+      37,    87,    -1,    26,    -1,    27,    -1,    -1,    26,    -1
+};
+
+/* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
+static const yytype_uint16 yyrline[] =
+{
+       0,   108,   108,   108,   110,   110,   112,   114,   115,   116,
+     117,   118,   119,   123,   127,   127,   127,   127,   127,   127,
+     127,   127,   131,   132,   133,   134,   135,   136,   140,   141,
+     147,   155,   161,   169,   179,   181,   182,   183,   184,   185,
+     186,   189,   197,   203,   213,   219,   225,   228,   230,   241,
+     242,   247,   256,   261,   269,   272,   274,   275,   276,   277,
+     278,   281,   287,   298,   304,   314,   316,   321,   329,   337,
+     340,   342,   343,   344,   349,   356,   363,   368,   376,   379,
+     381,   382,   383,   386,   394,   401,   408,   414,   421,   423,
+     424,   425,   428,   436,   438,   439,   442,   449,   451,   456,
+     457,   460,   461,   462,   466,   467,   470,   471,   474,   475,
+     476,   477,   478,   479,   480,   481,   482,   483,   484,   487,
+     488,   491,   492
+};
+#endif
+
+#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+   First, the terminals, then, starting at YYNTOKENS, nonterminals.  */
+static const char *const yytname[] =
+{
+  "$end", "error", "$undefined", "T_MAINMENU", "T_MENU", "T_ENDMENU",
+  "T_SOURCE", "T_CHOICE", "T_ENDCHOICE", "T_COMMENT", "T_CONFIG",
+  "T_MENUCONFIG", "T_HELP", "T_HELPTEXT", "T_IF", "T_ENDIF", "T_DEPENDS",
+  "T_OPTIONAL", "T_PROMPT", "T_TYPE", "T_DEFAULT", "T_SELECT", "T_RANGE",
+  "T_VISIBLE", "T_OPTION", "T_ON", "T_WORD", "T_WORD_QUOTE", "T_UNEQUAL",
+  "T_LESS", "T_LESS_EQUAL", "T_GREATER", "T_GREATER_EQUAL",
+  "T_CLOSE_PAREN", "T_OPEN_PAREN", "T_EOL", "T_OR", "T_AND", "T_EQUAL",
+  "T_NOT", "$accept", "input", "start", "stmt_list", "option_name",
+  "common_stmt", "option_error", "config_entry_start", "config_stmt",
+  "menuconfig_entry_start", "menuconfig_stmt", "config_option_list",
+  "config_option", "symbol_option", "symbol_option_list",
+  "symbol_option_arg", "choice", "choice_entry", "choice_end",
+  "choice_stmt", "choice_option_list", "choice_option", "choice_block",
+  "if_entry", "if_end", "if_stmt", "if_block", "mainmenu_stmt", "menu",
+  "menu_entry", "menu_end", "menu_stmt", "menu_block", "source_stmt",
+  "comment", "comment_stmt", "help_start", "help", "depends_list",
+  "depends", "visibility_list", "visible", "prompt_stmt_opt", "prompt",
+  "end", "nl", "if_expr", "expr", "symbol", "word_opt", YY_NULL
+};
+#endif
+
+# ifdef YYPRINT
+/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
+   token YYLEX-NUM.  */
+static const yytype_uint16 yytoknum[] =
+{
+       0,   256,   257,   258,   259,   260,   261,   262,   263,   264,
+     265,   266,   267,   268,   269,   270,   271,   272,   273,   274,
+     275,   276,   277,   278,   279,   280,   281,   282,   283,   284,
+     285,   286,   287,   288,   289,   290,   291,   292,   293,   294
+};
+# endif
+
+/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives.  */
+static const yytype_uint8 yyr1[] =
+{
+       0,    40,    41,    41,    42,    42,    43,    43,    43,    43,
+      43,    43,    43,    43,    44,    44,    44,    44,    44,    44,
+      44,    44,    45,    45,    45,    45,    45,    45,    46,    46,
+      47,    48,    49,    50,    51,    51,    51,    51,    51,    51,
+      51,    52,    52,    52,    52,    52,    53,    54,    54,    55,
+      55,    56,    57,    58,    59,    60,    60,    60,    60,    60,
+      60,    61,    61,    61,    61,    62,    62,    63,    64,    65,
+      66,    66,    66,    66,    67,    68,    69,    70,    71,    72,
+      72,    72,    72,    73,    74,    75,    76,    77,    78,    78,
+      78,    78,    79,    80,    80,    80,    81,    82,    82,    83,
+      83,    84,    84,    84,    85,    85,    86,    86,    87,    87,
+      87,    87,    87,    87,    87,    87,    87,    87,    87,    88,
+      88,    89,    89
+};
+
+/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
+static const yytype_uint8 yyr2[] =
+{
+       0,     2,     2,     1,     2,     1,     0,     2,     2,     2,
+       2,     4,     4,     3,     1,     1,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1,     3,     2,
+       3,     2,     3,     2,     0,     2,     2,     2,     2,     2,
+       2,     3,     4,     4,     4,     5,     3,     0,     3,     0,
+       2,     3,     2,     1,     3,     0,     2,     2,     2,     2,
+       2,     4,     3,     2,     4,     0,     2,     3,     1,     3,
+       0,     2,     2,     2,     3,     3,     3,     1,     3,     0,
+       2,     2,     2,     3,     3,     2,     2,     2,     0,     2,
+       2,     2,     4,     0,     2,     2,     2,     0,     2,     1,
+       1,     2,     2,     2,     1,     2,     0,     2,     1,     3,
+       3,     3,     3,     3,     3,     3,     2,     3,     3,     1,
+       1,     0,     1
+};
+
+/* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM.
+   Performed when YYTABLE doesn't specify something else to do.  Zero
+   means the default is an error.  */
+static const yytype_uint8 yydefact[] =
+{
+       6,     0,   104,     0,     3,     0,     6,     6,    99,   100,
+       0,     1,     0,     0,     0,     0,   121,     0,     0,     0,
+       0,     0,     0,    14,    18,    15,    16,    20,    17,    19,
+      21,     0,    22,     0,     7,    34,    25,    34,    26,    55,
+      65,     8,    70,    23,    93,    79,     9,    27,    88,    24,
+      10,     0,   105,     2,    74,    13,     0,   101,     0,   122,
+       0,   102,     0,     0,     0,   119,   120,     0,     0,     0,
+     108,   103,     0,     0,     0,     0,     0,     0,     0,    88,
+       0,     0,    75,    83,    51,    84,    30,    32,     0,   116,
+       0,     0,    67,     0,     0,     0,     0,     0,     0,    11,
+      12,     0,     0,     0,     0,    97,     0,     0,     0,    47,
+       0,    40,    39,    35,    36,     0,    38,    37,     0,     0,
+      97,     0,    59,    60,    56,    58,    57,    66,    54,    53,
+      71,    73,    69,    72,    68,   106,    95,     0,    94,    80,
+      82,    78,    81,    77,    90,    91,    89,   115,   117,   118,
+     114,   109,   110,   111,   112,   113,    29,    86,     0,   106,
+       0,   106,   106,   106,     0,     0,     0,    87,    63,   106,
+       0,   106,     0,    96,     0,     0,    41,    98,     0,     0,
+     106,    49,    46,    28,     0,    62,     0,   107,    92,    42,
+      43,    44,     0,     0,    48,    61,    64,    45,    50
+};
+
+/* YYDEFGOTO[NTERM-NUM].  */
+static const yytype_int16 yydefgoto[] =
+{
+      -1,     3,     4,     5,    33,    34,   112,    35,    36,    37,
+      38,    74,   113,   114,   165,   194,    39,    40,   128,    41,
+      76,   124,    77,    42,   132,    43,    78,     6,    44,    45,
+     141,    46,    80,    47,    48,    49,   115,   116,    81,   117,
+      79,   138,   160,   161,    50,     7,   173,    69,    70,    60
+};
+
+/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+   STATE-NUM.  */
+#define YYPACT_NINF -91
+static const yytype_int16 yypact[] =
+{
+      19,    37,   -91,    13,   -91,    79,   -91,    20,   -91,   -91,
+     -16,   -91,    21,    37,    25,    37,    41,    36,    37,    78,
+      83,    31,    56,   -91,   -91,   -91,   -91,   -91,   -91,   -91,
+     -91,   116,   -91,   127,   -91,   -91,   -91,   -91,   -91,   -91,
+     -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,
+     -91,   147,   -91,   -91,   105,   -91,   109,   -91,   111,   -91,
+     114,   -91,   136,   137,   142,   -91,   -91,    31,    31,    76,
+     254,   -91,   143,   146,    27,   115,   207,   258,   243,   -14,
+     243,   179,   -91,   -91,   -91,   -91,   -91,   -91,    -7,   -91,
+      31,    31,   105,    51,    51,    51,    51,    51,    51,   -91,
+     -91,   156,   168,   181,    37,    37,    31,   178,    51,   -91,
+     206,   -91,   -91,   -91,   -91,   196,   -91,   -91,   175,    37,
+      37,   185,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,
+     -91,   -91,   -91,   -91,   -91,   214,   -91,   230,   -91,   -91,
+     -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   183,   -91,
+     -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,    31,   214,
+     194,   214,    45,   214,    51,    26,   195,   -91,   -91,   214,
+     197,   214,    31,   -91,   139,   208,   -91,   -91,   220,   224,
+     214,   222,   -91,   -91,   226,   -91,   227,   123,   -91,   -91,
+     -91,   -91,   235,    37,   -91,   -91,   -91,   -91,   -91
+};
+
+/* YYPGOTO[NTERM-NUM].  */
+static const yytype_int16 yypgoto[] =
+{
+     -91,   -91,   264,   268,   -91,    30,   -65,   -91,   -91,   -91,
+     -91,   238,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -12,
+     -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,
+     -91,    -5,   -91,   -91,   -91,   -91,   -91,   200,   209,   -61,
+     -91,   -91,   170,    -1,    65,     0,   118,   -66,   -90,   -91
+};
+
+/* YYTABLE[YYPACT[STATE-NUM]].  What to do in state STATE-NUM.  If
+   positive, shift that token.  If negative, reduce the rule which
+   number is the opposite.  If YYTABLE_NINF, syntax error.  */
+#define YYTABLE_NINF -86
+static const yytype_int16 yytable[] =
+{
+      10,    88,    89,   150,   151,   152,   153,   154,   155,   135,
+      54,   123,    56,    11,    58,   126,   145,    62,   164,     2,
+     146,   136,     1,     1,   148,   149,   147,   -31,   101,    90,
+      91,   -31,   -31,   -31,   -31,   -31,   -31,   -31,   -31,   102,
+     162,   -31,   -31,   103,   -31,   104,   105,   106,   107,   108,
+     -31,   109,   181,   110,     2,    52,    55,    65,    66,   172,
+      57,   182,   111,     8,     9,    67,   131,    59,   140,    92,
+      68,    61,   145,   133,   180,   142,   146,    65,    66,    -5,
+      12,    90,    91,    13,    14,    15,    16,    17,    18,    19,
+      20,    71,   174,    21,    22,    23,    24,    25,    26,    27,
+      28,    29,    30,   159,    63,    31,   187,   127,   130,    64,
+     139,     2,    90,    91,    32,   -33,   101,    72,   169,   -33,
+     -33,   -33,   -33,   -33,   -33,   -33,   -33,   102,    73,   -33,
+     -33,   103,   -33,   104,   105,   106,   107,   108,   -33,   109,
+      52,   110,   129,   134,    82,   143,    83,    -4,    12,    84,
+     111,    13,    14,    15,    16,    17,    18,    19,    20,    90,
+      91,    21,    22,    23,    24,    25,    26,    27,    28,    29,
+      30,    85,    86,    31,   188,    90,    91,    87,    99,   -85,
+     101,   100,    32,   -85,   -85,   -85,   -85,   -85,   -85,   -85,
+     -85,   156,   198,   -85,   -85,   103,   -85,   -85,   -85,   -85,
+     -85,   -85,   -85,   157,   163,   110,   158,   166,   101,   167,
+     168,   171,   -52,   -52,   144,   -52,   -52,   -52,   -52,   102,
+      91,   -52,   -52,   103,   118,   119,   120,   121,   172,   176,
+     183,   101,   185,   110,   -76,   -76,   -76,   -76,   -76,   -76,
+     -76,   -76,   122,   189,   -76,   -76,   103,    13,    14,    15,
+      16,    17,    18,    19,    20,   190,   110,    21,    22,   191,
+     193,   195,   196,    14,    15,   144,    17,    18,    19,    20,
+     197,    53,    21,    22,    51,    75,   125,   175,    32,   177,
+     178,   179,    93,    94,    95,    96,    97,   184,   137,   186,
+     170,     0,    98,    32,     0,     0,     0,     0,   192
+};
+
+#define yypact_value_is_default(yystate) \
+  ((yystate) == (-91))
+
+#define yytable_value_is_error(yytable_value) \
+  YYID (0)
+
+static const yytype_int16 yycheck[] =
+{
+       1,    67,    68,    93,    94,    95,    96,    97,    98,    23,
+      10,    76,    13,     0,    15,    76,    81,    18,   108,    35,
+      81,    35,     3,     3,    90,    91,    33,     0,     1,    36,
+      37,     4,     5,     6,     7,     8,     9,    10,    11,    12,
+     106,    14,    15,    16,    17,    18,    19,    20,    21,    22,
+      23,    24,    26,    26,    35,    35,    35,    26,    27,    14,
+      35,    35,    35,    26,    27,    34,    78,    26,    80,    69,
+      39,    35,   137,    78,   164,    80,   137,    26,    27,     0,
+       1,    36,    37,     4,     5,     6,     7,     8,     9,    10,
+      11,    35,   158,    14,    15,    16,    17,    18,    19,    20,
+      21,    22,    23,   104,    26,    26,   172,    77,    78,    26,
+      80,    35,    36,    37,    35,     0,     1,     1,   119,     4,
+       5,     6,     7,     8,     9,    10,    11,    12,     1,    14,
+      15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
+      35,    26,    77,    78,    35,    80,    35,     0,     1,    35,
+      35,     4,     5,     6,     7,     8,     9,    10,    11,    36,
+      37,    14,    15,    16,    17,    18,    19,    20,    21,    22,
+      23,    35,    35,    26,    35,    36,    37,    35,    35,     0,
+       1,    35,    35,     4,     5,     6,     7,     8,     9,    10,
+      11,    35,   193,    14,    15,    16,    17,    18,    19,    20,
+      21,    22,    23,    35,    26,    26,    25,     1,     1,    13,
+      35,    26,     5,     6,    35,     8,     9,    10,    11,    12,
+      37,    14,    15,    16,    17,    18,    19,    20,    14,    35,
+      35,     1,    35,    26,     4,     5,     6,     7,     8,     9,
+      10,    11,    35,    35,    14,    15,    16,     4,     5,     6,
+       7,     8,     9,    10,    11,    35,    26,    14,    15,    35,
+      38,    35,    35,     5,     6,    35,     8,     9,    10,    11,
+      35,     7,    14,    15,     6,    37,    76,   159,    35,   161,
+     162,   163,    28,    29,    30,    31,    32,   169,    79,   171,
+     120,    -1,    38,    35,    -1,    -1,    -1,    -1,   180
+};
+
+/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
+   symbol of state STATE-NUM.  */
+static const yytype_uint8 yystos[] =
+{
+       0,     3,    35,    41,    42,    43,    67,    85,    26,    27,
+      83,     0,     1,     4,     5,     6,     7,     8,     9,    10,
+      11,    14,    15,    16,    17,    18,    19,    20,    21,    22,
+      23,    26,    35,    44,    45,    47,    48,    49,    50,    56,
+      57,    59,    63,    65,    68,    69,    71,    73,    74,    75,
+      84,    43,    35,    42,    85,    35,    83,    35,    83,    26,
+      89,    35,    83,    26,    26,    26,    27,    34,    39,    87,
+      88,    35,     1,     1,    51,    51,    60,    62,    66,    80,
+      72,    78,    35,    35,    35,    35,    35,    35,    87,    87,
+      36,    37,    85,    28,    29,    30,    31,    32,    38,    35,
+      35,     1,    12,    16,    18,    19,    20,    21,    22,    24,
+      26,    35,    46,    52,    53,    76,    77,    79,    17,    18,
+      19,    20,    35,    46,    61,    77,    79,    45,    58,    84,
+      45,    59,    64,    71,    84,    23,    35,    78,    81,    45,
+      59,    70,    71,    84,    35,    46,    79,    33,    87,    87,
+      88,    88,    88,    88,    88,    88,    35,    35,    25,    83,
+      82,    83,    87,    26,    88,    54,     1,    13,    35,    83,
+      82,    26,    14,    86,    87,    86,    35,    86,    86,    86,
+      88,    26,    35,    35,    86,    35,    86,    87,    35,    35,
+      35,    35,    86,    38,    55,    35,    35,    35,    83
+};
+
+#define yyerrok		(yyerrstatus = 0)
+#define yyclearin	(yychar = YYEMPTY)
+#define YYEMPTY		(-2)
+#define YYEOF		0
+
+#define YYACCEPT	goto yyacceptlab
+#define YYABORT		goto yyabortlab
+#define YYERROR		goto yyerrorlab
+
+
+/* Like YYERROR except do call yyerror.  This remains here temporarily
+   to ease the transition to the new meaning of YYERROR, for GCC.
+   Once GCC version 2 has supplanted version 1, this can go.  However,
+   YYFAIL appears to be in use.  Nevertheless, it is formally deprecated
+   in Bison 2.4.2's NEWS entry, where a plan to phase it out is
+   discussed.  */
+
+#define YYFAIL		goto yyerrlab
+#if defined YYFAIL
+  /* This is here to suppress warnings from the GCC cpp's
+     -Wunused-macros.  Normally we don't worry about that warning, but
+     some users do, and we want to make it easy for users to remove
+     YYFAIL uses, which will produce warnings from Bison 2.5.  */
+#endif
+
+#define YYRECOVERING()  (!!yyerrstatus)
+
+#define YYBACKUP(Token, Value)                                  \
+do                                                              \
+  if (yychar == YYEMPTY)                                        \
+    {                                                           \
+      yychar = (Token);                                         \
+      yylval = (Value);                                         \
+      YYPOPSTACK (yylen);                                       \
+      yystate = *yyssp;                                         \
+      goto yybackup;                                            \
+    }                                                           \
+  else                                                          \
+    {                                                           \
+      yyerror (YY_("syntax error: cannot back up")); \
+      YYERROR;							\
+    }								\
+while (YYID (0))
+
+
+#define YYTERROR	1
+#define YYERRCODE	256
+
+
+/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
+   If N is 0, then set CURRENT to the empty location which ends
+   the previous symbol: RHS[0] (always defined).  */
+
+#define YYRHSLOC(Rhs, K) ((Rhs)[K])
+#ifndef YYLLOC_DEFAULT
+# define YYLLOC_DEFAULT(Current, Rhs, N)				\
+    do									\
+      if (YYID (N))                                                    \
+	{								\
+	  (Current).first_line   = YYRHSLOC (Rhs, 1).first_line;	\
+	  (Current).first_column = YYRHSLOC (Rhs, 1).first_column;	\
+	  (Current).last_line    = YYRHSLOC (Rhs, N).last_line;		\
+	  (Current).last_column  = YYRHSLOC (Rhs, N).last_column;	\
+	}								\
+      else								\
+	{								\
+	  (Current).first_line   = (Current).last_line   =		\
+	    YYRHSLOC (Rhs, 0).last_line;				\
+	  (Current).first_column = (Current).last_column =		\
+	    YYRHSLOC (Rhs, 0).last_column;				\
+	}								\
+    while (YYID (0))
+#endif
+
+
+/* This macro is provided for backward compatibility. */
+
+#ifndef YY_LOCATION_PRINT
+# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
+#endif
+
+
+/* YYLEX -- calling `yylex' with the right arguments.  */
+
+#ifdef YYLEX_PARAM
+# define YYLEX yylex (YYLEX_PARAM)
+#else
+# define YYLEX yylex ()
+#endif
+
+/* Enable debugging if requested.  */
+#if YYDEBUG
+
+# ifndef YYFPRINTF
+#  include <stdio.h> /* INFRINGES ON USER NAME SPACE */
+#  define YYFPRINTF fprintf
+# endif
+
+# define YYDPRINTF(Args)			\
+do {						\
+  if (yydebug)					\
+    YYFPRINTF Args;				\
+} while (YYID (0))
+
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)			  \
+do {									  \
+  if (yydebug)								  \
+    {									  \
+      YYFPRINTF (stderr, "%s ", Title);					  \
+      yy_symbol_print (stderr,						  \
+		  Type, Value); \
+      YYFPRINTF (stderr, "\n");						  \
+    }									  \
+} while (YYID (0))
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT.  |
+`--------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
+#else
+static void
+yy_symbol_value_print (yyoutput, yytype, yyvaluep)
+    FILE *yyoutput;
+    int yytype;
+    YYSTYPE const * const yyvaluep;
+#endif
+{
+  FILE *yyo = yyoutput;
+  YYUSE (yyo);
+  if (!yyvaluep)
+    return;
+# ifdef YYPRINT
+  if (yytype < YYNTOKENS)
+    YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
+# else
+  YYUSE (yyoutput);
+# endif
+  switch (yytype)
+    {
+      default:
+	break;
+    }
+}
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT.  |
+`--------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
+#else
+static void
+yy_symbol_print (yyoutput, yytype, yyvaluep)
+    FILE *yyoutput;
+    int yytype;
+    YYSTYPE const * const yyvaluep;
+#endif
+{
+  if (yytype < YYNTOKENS)
+    YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
+  else
+    YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
+
+  yy_symbol_value_print (yyoutput, yytype, yyvaluep);
+  YYFPRINTF (yyoutput, ")");
+}
+
+/*------------------------------------------------------------------.
+| yy_stack_print -- Print the state stack from its BOTTOM up to its |
+| TOP (included).                                                   |
+`------------------------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop)
+#else
+static void
+yy_stack_print (yybottom, yytop)
+    yytype_int16 *yybottom;
+    yytype_int16 *yytop;
+#endif
+{
+  YYFPRINTF (stderr, "Stack now");
+  for (; yybottom <= yytop; yybottom++)
+    {
+      int yybot = *yybottom;
+      YYFPRINTF (stderr, " %d", yybot);
+    }
+  YYFPRINTF (stderr, "\n");
+}
+
+# define YY_STACK_PRINT(Bottom, Top)				\
+do {								\
+  if (yydebug)							\
+    yy_stack_print ((Bottom), (Top));				\
+} while (YYID (0))
+
+
+/*------------------------------------------------.
+| Report that the YYRULE is going to be reduced.  |
+`------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_reduce_print (YYSTYPE *yyvsp, int yyrule)
+#else
+static void
+yy_reduce_print (yyvsp, yyrule)
+    YYSTYPE *yyvsp;
+    int yyrule;
+#endif
+{
+  int yynrhs = yyr2[yyrule];
+  int yyi;
+  unsigned long int yylno = yyrline[yyrule];
+  YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n",
+	     yyrule - 1, yylno);
+  /* The symbols being reduced.  */
+  for (yyi = 0; yyi < yynrhs; yyi++)
+    {
+      YYFPRINTF (stderr, "   $%d = ", yyi + 1);
+      yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi],
+		       &(yyvsp[(yyi + 1) - (yynrhs)])
+		       		       );
+      YYFPRINTF (stderr, "\n");
+    }
+}
+
+# define YY_REDUCE_PRINT(Rule)		\
+do {					\
+  if (yydebug)				\
+    yy_reduce_print (yyvsp, Rule); \
+} while (YYID (0))
+
+/* Nonzero means print parse trace.  It is left uninitialized so that
+   multiple parsers can coexist.  */
+int yydebug;
+#else /* !YYDEBUG */
+# define YYDPRINTF(Args)
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
+# define YY_STACK_PRINT(Bottom, Top)
+# define YY_REDUCE_PRINT(Rule)
+#endif /* !YYDEBUG */
+
+
+/* YYINITDEPTH -- initial size of the parser's stacks.  */
+#ifndef	YYINITDEPTH
+# define YYINITDEPTH 200
+#endif
+
+/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
+   if the built-in stack extension method is used).
+
+   Do not make this value too large; the results are undefined if
+   YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
+   evaluated with infinite-precision integer arithmetic.  */
+
+#ifndef YYMAXDEPTH
+# define YYMAXDEPTH 10000
+#endif
+
+
+#if YYERROR_VERBOSE
+
+# ifndef yystrlen
+#  if defined __GLIBC__ && defined _STRING_H
+#   define yystrlen strlen
+#  else
+/* Return the length of YYSTR.  */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static YYSIZE_T
+yystrlen (const char *yystr)
+#else
+static YYSIZE_T
+yystrlen (yystr)
+    const char *yystr;
+#endif
+{
+  YYSIZE_T yylen;
+  for (yylen = 0; yystr[yylen]; yylen++)
+    continue;
+  return yylen;
+}
+#  endif
+# endif
+
+# ifndef yystpcpy
+#  if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE
+#   define yystpcpy stpcpy
+#  else
+/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
+   YYDEST.  */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static char *
+yystpcpy (char *yydest, const char *yysrc)
+#else
+static char *
+yystpcpy (yydest, yysrc)
+    char *yydest;
+    const char *yysrc;
+#endif
+{
+  char *yyd = yydest;
+  const char *yys = yysrc;
+
+  while ((*yyd++ = *yys++) != '\0')
+    continue;
+
+  return yyd - 1;
+}
+#  endif
+# endif
+
+# ifndef yytnamerr
+/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
+   quotes and backslashes, so that it's suitable for yyerror.  The
+   heuristic is that double-quoting is unnecessary unless the string
+   contains an apostrophe, a comma, or backslash (other than
+   backslash-backslash).  YYSTR is taken from yytname.  If YYRES is
+   null, do not copy; instead, return the length of what the result
+   would have been.  */
+static YYSIZE_T
+yytnamerr (char *yyres, const char *yystr)
+{
+  if (*yystr == '"')
+    {
+      YYSIZE_T yyn = 0;
+      char const *yyp = yystr;
+
+      for (;;)
+	switch (*++yyp)
+	  {
+	  case '\'':
+	  case ',':
+	    goto do_not_strip_quotes;
+
+	  case '\\':
+	    if (*++yyp != '\\')
+	      goto do_not_strip_quotes;
+	    /* Fall through.  */
+	  default:
+	    if (yyres)
+	      yyres[yyn] = *yyp;
+	    yyn++;
+	    break;
+
+	  case '"':
+	    if (yyres)
+	      yyres[yyn] = '\0';
+	    return yyn;
+	  }
+    do_not_strip_quotes: ;
+    }
+
+  if (! yyres)
+    return yystrlen (yystr);
+
+  return yystpcpy (yyres, yystr) - yyres;
+}
+# endif
+
+/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message
+   about the unexpected token YYTOKEN for the state stack whose top is
+   YYSSP.
+
+   Return 0 if *YYMSG was successfully written.  Return 1 if *YYMSG is
+   not large enough to hold the message.  In that case, also set
+   *YYMSG_ALLOC to the required number of bytes.  Return 2 if the
+   required number of bytes is too large to store.  */
+static int
+yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
+                yytype_int16 *yyssp, int yytoken)
+{
+  YYSIZE_T yysize0 = yytnamerr (YY_NULL, yytname[yytoken]);
+  YYSIZE_T yysize = yysize0;
+  YYSIZE_T yysize1;
+  enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
+  /* Internationalized format string. */
+  const char *yyformat = YY_NULL;
+  /* Arguments of yyformat. */
+  char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
+  /* Number of reported tokens (one for the "unexpected", one per
+     "expected"). */
+  int yycount = 0;
+
+  /* There are many possibilities here to consider:
+     - Assume YYFAIL is not used.  It's too flawed to consider.  See
+       <http://lists.gnu.org/archive/html/bison-patches/2009-12/msg00024.html>
+       for details.  YYERROR is fine as it does not invoke this
+       function.
+     - If this state is a consistent state with a default action, then
+       the only way this function was invoked is if the default action
+       is an error action.  In that case, don't check for expected
+       tokens because there are none.
+     - The only way there can be no lookahead present (in yychar) is if
+       this state is a consistent state with a default action.  Thus,
+       detecting the absence of a lookahead is sufficient to determine
+       that there is no unexpected or expected token to report.  In that
+       case, just report a simple "syntax error".
+     - Don't assume there isn't a lookahead just because this state is a
+       consistent state with a default action.  There might have been a
+       previous inconsistent state, consistent state with a non-default
+       action, or user semantic action that manipulated yychar.
+     - Of course, the expected token list depends on states to have
+       correct lookahead information, and it depends on the parser not
+       to perform extra reductions after fetching a lookahead from the
+       scanner and before detecting a syntax error.  Thus, state merging
+       (from LALR or IELR) and default reductions corrupt the expected
+       token list.  However, the list is correct for canonical LR with
+       one exception: it will still contain any token that will not be
+       accepted due to an error action in a later state.
+  */
+  if (yytoken != YYEMPTY)
+    {
+      int yyn = yypact[*yyssp];
+      yyarg[yycount++] = yytname[yytoken];
+      if (!yypact_value_is_default (yyn))
+        {
+          /* Start YYX at -YYN if negative to avoid negative indexes in
+             YYCHECK.  In other words, skip the first -YYN actions for
+             this state because they are default actions.  */
+          int yyxbegin = yyn < 0 ? -yyn : 0;
+          /* Stay within bounds of both yycheck and yytname.  */
+          int yychecklim = YYLAST - yyn + 1;
+          int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
+          int yyx;
+
+          for (yyx = yyxbegin; yyx < yyxend; ++yyx)
+            if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR
+                && !yytable_value_is_error (yytable[yyx + yyn]))
+              {
+                if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
+                  {
+                    yycount = 1;
+                    yysize = yysize0;
+                    break;
+                  }
+                yyarg[yycount++] = yytname[yyx];
+                yysize1 = yysize + yytnamerr (YY_NULL, yytname[yyx]);
+                if (! (yysize <= yysize1
+                       && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+                  return 2;
+                yysize = yysize1;
+              }
+        }
+    }
+
+  switch (yycount)
+    {
+# define YYCASE_(N, S)                      \
+      case N:                               \
+        yyformat = S;                       \
+      break
+      YYCASE_(0, YY_("syntax error"));
+      YYCASE_(1, YY_("syntax error, unexpected %s"));
+      YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s"));
+      YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s"));
+      YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s"));
+      YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"));
+# undef YYCASE_
+    }
+
+  yysize1 = yysize + yystrlen (yyformat);
+  if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+    return 2;
+  yysize = yysize1;
+
+  if (*yymsg_alloc < yysize)
+    {
+      *yymsg_alloc = 2 * yysize;
+      if (! (yysize <= *yymsg_alloc
+             && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM))
+        *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM;
+      return 1;
+    }
+
+  /* Avoid sprintf, as that infringes on the user's name space.
+     Don't have undefined behavior even if the translation
+     produced a string with the wrong number of "%s"s.  */
+  {
+    char *yyp = *yymsg;
+    int yyi = 0;
+    while ((*yyp = *yyformat) != '\0')
+      if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
+        {
+          yyp += yytnamerr (yyp, yyarg[yyi++]);
+          yyformat += 2;
+        }
+      else
+        {
+          yyp++;
+          yyformat++;
+        }
+  }
+  return 0;
+}
+#endif /* YYERROR_VERBOSE */
+
+/*-----------------------------------------------.
+| Release the memory associated to this symbol.  |
+`-----------------------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
+#else
+static void
+yydestruct (yymsg, yytype, yyvaluep)
+    const char *yymsg;
+    int yytype;
+    YYSTYPE *yyvaluep;
+#endif
+{
+  YYUSE (yyvaluep);
+
+  if (!yymsg)
+    yymsg = "Deleting";
+  YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
+
+  switch (yytype)
+    {
+      case 57: /* "choice_entry" */
+
+	{
+	fprintf(stderr, "%s:%d: missing end statement for this entry\n",
+		(yyvaluep->menu)->file->name, (yyvaluep->menu)->lineno);
+	if (current_menu == (yyvaluep->menu))
+		menu_end_menu();
+};
+
+	break;
+      case 63: /* "if_entry" */
+
+	{
+	fprintf(stderr, "%s:%d: missing end statement for this entry\n",
+		(yyvaluep->menu)->file->name, (yyvaluep->menu)->lineno);
+	if (current_menu == (yyvaluep->menu))
+		menu_end_menu();
+};
+
+	break;
+      case 69: /* "menu_entry" */
+
+	{
+	fprintf(stderr, "%s:%d: missing end statement for this entry\n",
+		(yyvaluep->menu)->file->name, (yyvaluep->menu)->lineno);
+	if (current_menu == (yyvaluep->menu))
+		menu_end_menu();
+};
+
+	break;
+
+      default:
+	break;
+    }
+}
+
+
+/* Prevent warnings from -Wmissing-prototypes.  */
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
+
+
+/* The lookahead symbol.  */
+int yychar;
+
+/* The semantic value of the lookahead symbol.  */
+YYSTYPE yylval;
+
+/* Number of syntax errors so far.  */
+int yynerrs;
+
+
+/*----------.
+| yyparse.  |
+`----------*/
+
+#ifdef YYPARSE_PARAM
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void *YYPARSE_PARAM)
+#else
+int
+yyparse (YYPARSE_PARAM)
+    void *YYPARSE_PARAM;
+#endif
+#else /* ! YYPARSE_PARAM */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void)
+#else
+int
+yyparse ()
+
+#endif
+#endif
+{
+    int yystate;
+    /* Number of tokens to shift before error messages enabled.  */
+    int yyerrstatus;
+
+    /* The stacks and their tools:
+       `yyss': related to states.
+       `yyvs': related to semantic values.
+
+       Refer to the stacks through separate pointers, to allow yyoverflow
+       to reallocate them elsewhere.  */
+
+    /* The state stack.  */
+    yytype_int16 yyssa[YYINITDEPTH];
+    yytype_int16 *yyss;
+    yytype_int16 *yyssp;
+
+    /* The semantic value stack.  */
+    YYSTYPE yyvsa[YYINITDEPTH];
+    YYSTYPE *yyvs;
+    YYSTYPE *yyvsp;
+
+    YYSIZE_T yystacksize;
+
+  int yyn;
+  int yyresult;
+  /* Lookahead token as an internal (translated) token number.  */
+  int yytoken;
+  /* The variables used to return semantic value and location from the
+     action routines.  */
+  YYSTYPE yyval;
+
+#if YYERROR_VERBOSE
+  /* Buffer for error messages, and its allocated size.  */
+  char yymsgbuf[128];
+  char *yymsg = yymsgbuf;
+  YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
+#endif
+
+#define YYPOPSTACK(N)   (yyvsp -= (N), yyssp -= (N))
+
+  /* The number of symbols on the RHS of the reduced rule.
+     Keep to zero when no symbol should be popped.  */
+  int yylen = 0;
+
+  yytoken = 0;
+  yyss = yyssa;
+  yyvs = yyvsa;
+  yystacksize = YYINITDEPTH;
+
+  YYDPRINTF ((stderr, "Starting parse\n"));
+
+  yystate = 0;
+  yyerrstatus = 0;
+  yynerrs = 0;
+  yychar = YYEMPTY; /* Cause a token to be read.  */
+
+  /* Initialize stack pointers.
+     Waste one element of value and location stack
+     so that they stay on the same level as the state stack.
+     The wasted elements are never initialized.  */
+  yyssp = yyss;
+  yyvsp = yyvs;
+
+  goto yysetstate;
+
+/*------------------------------------------------------------.
+| yynewstate -- Push a new state, which is found in yystate.  |
+`------------------------------------------------------------*/
+ yynewstate:
+  /* In all cases, when you get here, the value and location stacks
+     have just been pushed.  So pushing a state here evens the stacks.  */
+  yyssp++;
+
+ yysetstate:
+  *yyssp = yystate;
+
+  if (yyss + yystacksize - 1 <= yyssp)
+    {
+      /* Get the current used size of the three stacks, in elements.  */
+      YYSIZE_T yysize = yyssp - yyss + 1;
+
+#ifdef yyoverflow
+      {
+	/* Give user a chance to reallocate the stack.  Use copies of
+	   these so that the &'s don't force the real ones into
+	   memory.  */
+	YYSTYPE *yyvs1 = yyvs;
+	yytype_int16 *yyss1 = yyss;
+
+	/* Each stack pointer address is followed by the size of the
+	   data in use in that stack, in bytes.  This used to be a
+	   conditional around just the two extra args, but that might
+	   be undefined if yyoverflow is a macro.  */
+	yyoverflow (YY_("memory exhausted"),
+		    &yyss1, yysize * sizeof (*yyssp),
+		    &yyvs1, yysize * sizeof (*yyvsp),
+		    &yystacksize);
+
+	yyss = yyss1;
+	yyvs = yyvs1;
+      }
+#else /* no yyoverflow */
+# ifndef YYSTACK_RELOCATE
+      goto yyexhaustedlab;
+# else
+      /* Extend the stack our own way.  */
+      if (YYMAXDEPTH <= yystacksize)
+	goto yyexhaustedlab;
+      yystacksize *= 2;
+      if (YYMAXDEPTH < yystacksize)
+	yystacksize = YYMAXDEPTH;
+
+      {
+	yytype_int16 *yyss1 = yyss;
+	union yyalloc *yyptr =
+	  (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
+	if (! yyptr)
+	  goto yyexhaustedlab;
+	YYSTACK_RELOCATE (yyss_alloc, yyss);
+	YYSTACK_RELOCATE (yyvs_alloc, yyvs);
+#  undef YYSTACK_RELOCATE
+	if (yyss1 != yyssa)
+	  YYSTACK_FREE (yyss1);
+      }
+# endif
+#endif /* no yyoverflow */
+
+      yyssp = yyss + yysize - 1;
+      yyvsp = yyvs + yysize - 1;
+
+      YYDPRINTF ((stderr, "Stack size increased to %lu\n",
+		  (unsigned long int) yystacksize));
+
+      if (yyss + yystacksize - 1 <= yyssp)
+	YYABORT;
+    }
+
+  YYDPRINTF ((stderr, "Entering state %d\n", yystate));
+
+  if (yystate == YYFINAL)
+    YYACCEPT;
+
+  goto yybackup;
+
+/*-----------.
+| yybackup.  |
+`-----------*/
+yybackup:
+
+  /* Do appropriate processing given the current state.  Read a
+     lookahead token if we need one and don't already have one.  */
+
+  /* First try to decide what to do without reference to lookahead token.  */
+  yyn = yypact[yystate];
+  if (yypact_value_is_default (yyn))
+    goto yydefault;
+
+  /* Not known => get a lookahead token if don't already have one.  */
+
+  /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol.  */
+  if (yychar == YYEMPTY)
+    {
+      YYDPRINTF ((stderr, "Reading a token: "));
+      yychar = YYLEX;
+    }
+
+  if (yychar <= YYEOF)
+    {
+      yychar = yytoken = YYEOF;
+      YYDPRINTF ((stderr, "Now at end of input.\n"));
+    }
+  else
+    {
+      yytoken = YYTRANSLATE (yychar);
+      YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
+    }
+
+  /* If the proper action on seeing token YYTOKEN is to reduce or to
+     detect an error, take that action.  */
+  yyn += yytoken;
+  if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
+    goto yydefault;
+  yyn = yytable[yyn];
+  if (yyn <= 0)
+    {
+      if (yytable_value_is_error (yyn))
+        goto yyerrlab;
+      yyn = -yyn;
+      goto yyreduce;
+    }
+
+  /* Count tokens shifted since error; after three, turn off error
+     status.  */
+  if (yyerrstatus)
+    yyerrstatus--;
+
+  /* Shift the lookahead token.  */
+  YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
+
+  /* Discard the shifted token.  */
+  yychar = YYEMPTY;
+
+  yystate = yyn;
+  *++yyvsp = yylval;
+
+  goto yynewstate;
+
+
+/*-----------------------------------------------------------.
+| yydefault -- do the default action for the current state.  |
+`-----------------------------------------------------------*/
+yydefault:
+  yyn = yydefact[yystate];
+  if (yyn == 0)
+    goto yyerrlab;
+  goto yyreduce;
+
+
+/*-----------------------------.
+| yyreduce -- Do a reduction.  |
+`-----------------------------*/
+yyreduce:
+  /* yyn is the number of a rule to reduce with.  */
+  yylen = yyr2[yyn];
+
+  /* If YYLEN is nonzero, implement the default value of the action:
+     `$$ = $1'.
+
+     Otherwise, the following line sets YYVAL to garbage.
+     This behavior is undocumented and Bison
+     users should not rely upon it.  Assigning to YYVAL
+     unconditionally makes the parser a bit smaller, and it avoids a
+     GCC warning that YYVAL may be used uninitialized.  */
+  yyval = yyvsp[1-yylen];
+
+
+  YY_REDUCE_PRINT (yyn);
+  switch (yyn)
+    {
+        case 10:
+
+    { zconf_error("unexpected end statement"); }
+    break;
+
+  case 11:
+
+    { zconf_error("unknown statement \"%s\"", (yyvsp[(2) - (4)].string)); }
+    break;
+
+  case 12:
+
+    {
+	zconf_error("unexpected option \"%s\"", kconf_id_strings + (yyvsp[(2) - (4)].id)->name);
+}
+    break;
+
+  case 13:
+
+    { zconf_error("invalid statement"); }
+    break;
+
+  case 28:
+
+    { zconf_error("unknown option \"%s\"", (yyvsp[(1) - (3)].string)); }
+    break;
+
+  case 29:
+
+    { zconf_error("invalid option"); }
+    break;
+
+  case 30:
+
+    {
+	struct symbol *sym = sym_lookup((yyvsp[(2) - (3)].string), 0);
+	sym->flags |= SYMBOL_OPTIONAL;
+	menu_add_entry(sym);
+	printd(DEBUG_PARSE, "%s:%d:config %s\n", zconf_curname(), zconf_lineno(), (yyvsp[(2) - (3)].string));
+}
+    break;
+
+  case 31:
+
+    {
+	menu_end_entry();
+	printd(DEBUG_PARSE, "%s:%d:endconfig\n", zconf_curname(), zconf_lineno());
+}
+    break;
+
+  case 32:
+
+    {
+	struct symbol *sym = sym_lookup((yyvsp[(2) - (3)].string), 0);
+	sym->flags |= SYMBOL_OPTIONAL;
+	menu_add_entry(sym);
+	printd(DEBUG_PARSE, "%s:%d:menuconfig %s\n", zconf_curname(), zconf_lineno(), (yyvsp[(2) - (3)].string));
+}
+    break;
+
+  case 33:
+
+    {
+	if (current_entry->prompt)
+		current_entry->prompt->type = P_MENU;
+	else
+		zconfprint("warning: menuconfig statement without prompt");
+	menu_end_entry();
+	printd(DEBUG_PARSE, "%s:%d:endconfig\n", zconf_curname(), zconf_lineno());
+}
+    break;
+
+  case 41:
+
+    {
+	menu_set_type((yyvsp[(1) - (3)].id)->stype);
+	printd(DEBUG_PARSE, "%s:%d:type(%u)\n",
+		zconf_curname(), zconf_lineno(),
+		(yyvsp[(1) - (3)].id)->stype);
+}
+    break;
+
+  case 42:
+
+    {
+	menu_add_prompt(P_PROMPT, (yyvsp[(2) - (4)].string), (yyvsp[(3) - (4)].expr));
+	printd(DEBUG_PARSE, "%s:%d:prompt\n", zconf_curname(), zconf_lineno());
+}
+    break;
+
+  case 43:
+
+    {
+	menu_add_expr(P_DEFAULT, (yyvsp[(2) - (4)].expr), (yyvsp[(3) - (4)].expr));
+	if ((yyvsp[(1) - (4)].id)->stype != S_UNKNOWN)
+		menu_set_type((yyvsp[(1) - (4)].id)->stype);
+	printd(DEBUG_PARSE, "%s:%d:default(%u)\n",
+		zconf_curname(), zconf_lineno(),
+		(yyvsp[(1) - (4)].id)->stype);
+}
+    break;
+
+  case 44:
+
+    {
+	menu_add_symbol(P_SELECT, sym_lookup((yyvsp[(2) - (4)].string), 0), (yyvsp[(3) - (4)].expr));
+	printd(DEBUG_PARSE, "%s:%d:select\n", zconf_curname(), zconf_lineno());
+}
+    break;
+
+  case 45:
+
+    {
+	menu_add_expr(P_RANGE, expr_alloc_comp(E_RANGE,(yyvsp[(2) - (5)].symbol), (yyvsp[(3) - (5)].symbol)), (yyvsp[(4) - (5)].expr));
+	printd(DEBUG_PARSE, "%s:%d:range\n", zconf_curname(), zconf_lineno());
+}
+    break;
+
+  case 48:
+
+    {
+	const struct kconf_id *id = kconf_id_lookup((yyvsp[(2) - (3)].string), strlen((yyvsp[(2) - (3)].string)));
+	if (id && id->flags & TF_OPTION)
+		menu_add_option(id->token, (yyvsp[(3) - (3)].string));
+	else
+		zconfprint("warning: ignoring unknown option %s", (yyvsp[(2) - (3)].string));
+	free((yyvsp[(2) - (3)].string));
+}
+    break;
+
+  case 49:
+
+    { (yyval.string) = NULL; }
+    break;
+
+  case 50:
+
+    { (yyval.string) = (yyvsp[(2) - (2)].string); }
+    break;
+
+  case 51:
+
+    {
+	struct symbol *sym = sym_lookup((yyvsp[(2) - (3)].string), SYMBOL_CHOICE);
+	sym->flags |= SYMBOL_AUTO;
+	menu_add_entry(sym);
+	menu_add_expr(P_CHOICE, NULL, NULL);
+	printd(DEBUG_PARSE, "%s:%d:choice\n", zconf_curname(), zconf_lineno());
+}
+    break;
+
+  case 52:
+
+    {
+	(yyval.menu) = menu_add_menu();
+}
+    break;
+
+  case 53:
+
+    {
+	if (zconf_endtoken((yyvsp[(1) - (1)].id), T_CHOICE, T_ENDCHOICE)) {
+		menu_end_menu();
+		printd(DEBUG_PARSE, "%s:%d:endchoice\n", zconf_curname(), zconf_lineno());
+	}
+}
+    break;
+
+  case 61:
+
+    {
+	menu_add_prompt(P_PROMPT, (yyvsp[(2) - (4)].string), (yyvsp[(3) - (4)].expr));
+	printd(DEBUG_PARSE, "%s:%d:prompt\n", zconf_curname(), zconf_lineno());
+}
+    break;
+
+  case 62:
+
+    {
+	if ((yyvsp[(1) - (3)].id)->stype == S_BOOLEAN || (yyvsp[(1) - (3)].id)->stype == S_TRISTATE) {
+		menu_set_type((yyvsp[(1) - (3)].id)->stype);
+		printd(DEBUG_PARSE, "%s:%d:type(%u)\n",
+			zconf_curname(), zconf_lineno(),
+			(yyvsp[(1) - (3)].id)->stype);
+	} else
+		YYERROR;
+}
+    break;
+
+  case 63:
+
+    {
+	current_entry->sym->flags |= SYMBOL_OPTIONAL;
+	printd(DEBUG_PARSE, "%s:%d:optional\n", zconf_curname(), zconf_lineno());
+}
+    break;
+
+  case 64:
+
+    {
+	if ((yyvsp[(1) - (4)].id)->stype == S_UNKNOWN) {
+		menu_add_symbol(P_DEFAULT, sym_lookup((yyvsp[(2) - (4)].string), 0), (yyvsp[(3) - (4)].expr));
+		printd(DEBUG_PARSE, "%s:%d:default\n",
+			zconf_curname(), zconf_lineno());
+	} else
+		YYERROR;
+}
+    break;
+
+  case 67:
+
+    {
+	printd(DEBUG_PARSE, "%s:%d:if\n", zconf_curname(), zconf_lineno());
+	menu_add_entry(NULL);
+	menu_add_dep((yyvsp[(2) - (3)].expr));
+	(yyval.menu) = menu_add_menu();
+}
+    break;
+
+  case 68:
+
+    {
+	if (zconf_endtoken((yyvsp[(1) - (1)].id), T_IF, T_ENDIF)) {
+		menu_end_menu();
+		printd(DEBUG_PARSE, "%s:%d:endif\n", zconf_curname(), zconf_lineno());
+	}
+}
+    break;
+
+  case 74:
+
+    {
+	menu_add_prompt(P_MENU, (yyvsp[(2) - (3)].string), NULL);
+}
+    break;
+
+  case 75:
+
+    {
+	menu_add_entry(NULL);
+	menu_add_prompt(P_MENU, (yyvsp[(2) - (3)].string), NULL);
+	printd(DEBUG_PARSE, "%s:%d:menu\n", zconf_curname(), zconf_lineno());
+}
+    break;
+
+  case 76:
+
+    {
+	(yyval.menu) = menu_add_menu();
+}
+    break;
+
+  case 77:
+
+    {
+	if (zconf_endtoken((yyvsp[(1) - (1)].id), T_MENU, T_ENDMENU)) {
+		menu_end_menu();
+		printd(DEBUG_PARSE, "%s:%d:endmenu\n", zconf_curname(), zconf_lineno());
+	}
+}
+    break;
+
+  case 83:
+
+    {
+	printd(DEBUG_PARSE, "%s:%d:source %s\n", zconf_curname(), zconf_lineno(), (yyvsp[(2) - (3)].string));
+	zconf_nextfile((yyvsp[(2) - (3)].string));
+}
+    break;
+
+  case 84:
+
+    {
+	menu_add_entry(NULL);
+	menu_add_prompt(P_COMMENT, (yyvsp[(2) - (3)].string), NULL);
+	printd(DEBUG_PARSE, "%s:%d:comment\n", zconf_curname(), zconf_lineno());
+}
+    break;
+
+  case 85:
+
+    {
+	menu_end_entry();
+}
+    break;
+
+  case 86:
+
+    {
+	printd(DEBUG_PARSE, "%s:%d:help\n", zconf_curname(), zconf_lineno());
+	zconf_starthelp();
+}
+    break;
+
+  case 87:
+
+    {
+	current_entry->help = (yyvsp[(2) - (2)].string);
+}
+    break;
+
+  case 92:
+
+    {
+	menu_add_dep((yyvsp[(3) - (4)].expr));
+	printd(DEBUG_PARSE, "%s:%d:depends on\n", zconf_curname(), zconf_lineno());
+}
+    break;
+
+  case 96:
+
+    {
+	menu_add_visibility((yyvsp[(2) - (2)].expr));
+}
+    break;
+
+  case 98:
+
+    {
+	menu_add_prompt(P_PROMPT, (yyvsp[(1) - (2)].string), (yyvsp[(2) - (2)].expr));
+}
+    break;
+
+  case 101:
+
+    { (yyval.id) = (yyvsp[(1) - (2)].id); }
+    break;
+
+  case 102:
+
+    { (yyval.id) = (yyvsp[(1) - (2)].id); }
+    break;
+
+  case 103:
+
+    { (yyval.id) = (yyvsp[(1) - (2)].id); }
+    break;
+
+  case 106:
+
+    { (yyval.expr) = NULL; }
+    break;
+
+  case 107:
+
+    { (yyval.expr) = (yyvsp[(2) - (2)].expr); }
+    break;
+
+  case 108:
+
+    { (yyval.expr) = expr_alloc_symbol((yyvsp[(1) - (1)].symbol)); }
+    break;
+
+  case 109:
+
+    { (yyval.expr) = expr_alloc_comp(E_LTH, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
+    break;
+
+  case 110:
+
+    { (yyval.expr) = expr_alloc_comp(E_LEQ, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
+    break;
+
+  case 111:
+
+    { (yyval.expr) = expr_alloc_comp(E_GTH, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
+    break;
+
+  case 112:
+
+    { (yyval.expr) = expr_alloc_comp(E_GEQ, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
+    break;
+
+  case 113:
+
+    { (yyval.expr) = expr_alloc_comp(E_EQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
+    break;
+
+  case 114:
+
+    { (yyval.expr) = expr_alloc_comp(E_UNEQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
+    break;
+
+  case 115:
+
+    { (yyval.expr) = (yyvsp[(2) - (3)].expr); }
+    break;
+
+  case 116:
+
+    { (yyval.expr) = expr_alloc_one(E_NOT, (yyvsp[(2) - (2)].expr)); }
+    break;
+
+  case 117:
+
+    { (yyval.expr) = expr_alloc_two(E_OR, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); }
+    break;
+
+  case 118:
+
+    { (yyval.expr) = expr_alloc_two(E_AND, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); }
+    break;
+
+  case 119:
+
+    { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), 0); free((yyvsp[(1) - (1)].string)); }
+    break;
+
+  case 120:
+
+    { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), SYMBOL_CONST); free((yyvsp[(1) - (1)].string)); }
+    break;
+
+  case 121:
+
+    { (yyval.string) = NULL; }
+    break;
+
+
+
+      default: break;
+    }
+  /* User semantic actions sometimes alter yychar, and that requires
+     that yytoken be updated with the new translation.  We take the
+     approach of translating immediately before every use of yytoken.
+     One alternative is translating here after every semantic action,
+     but that translation would be missed if the semantic action invokes
+     YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or
+     if it invokes YYBACKUP.  In the case of YYABORT or YYACCEPT, an
+     incorrect destructor might then be invoked immediately.  In the
+     case of YYERROR or YYBACKUP, subsequent parser actions might lead
+     to an incorrect destructor call or verbose syntax error message
+     before the lookahead is translated.  */
+  YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
+
+  YYPOPSTACK (yylen);
+  yylen = 0;
+  YY_STACK_PRINT (yyss, yyssp);
+
+  *++yyvsp = yyval;
+
+  /* Now `shift' the result of the reduction.  Determine what state
+     that goes to, based on the state we popped back to and the rule
+     number reduced by.  */
+
+  yyn = yyr1[yyn];
+
+  yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
+  if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
+    yystate = yytable[yystate];
+  else
+    yystate = yydefgoto[yyn - YYNTOKENS];
+
+  goto yynewstate;
+
+
+/*------------------------------------.
+| yyerrlab -- here on detecting error |
+`------------------------------------*/
+yyerrlab:
+  /* Make sure we have latest lookahead translation.  See comments at
+     user semantic actions for why this is necessary.  */
+  yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar);
+
+  /* If not already recovering from an error, report this error.  */
+  if (!yyerrstatus)
+    {
+      ++yynerrs;
+#if ! YYERROR_VERBOSE
+      yyerror (YY_("syntax error"));
+#else
+# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \
+                                        yyssp, yytoken)
+      {
+        char const *yymsgp = YY_("syntax error");
+        int yysyntax_error_status;
+        yysyntax_error_status = YYSYNTAX_ERROR;
+        if (yysyntax_error_status == 0)
+          yymsgp = yymsg;
+        else if (yysyntax_error_status == 1)
+          {
+            if (yymsg != yymsgbuf)
+              YYSTACK_FREE (yymsg);
+            yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc);
+            if (!yymsg)
+              {
+                yymsg = yymsgbuf;
+                yymsg_alloc = sizeof yymsgbuf;
+                yysyntax_error_status = 2;
+              }
+            else
+              {
+                yysyntax_error_status = YYSYNTAX_ERROR;
+                yymsgp = yymsg;
+              }
+          }
+        yyerror (yymsgp);
+        if (yysyntax_error_status == 2)
+          goto yyexhaustedlab;
+      }
+# undef YYSYNTAX_ERROR
+#endif
+    }
+
+
+
+  if (yyerrstatus == 3)
+    {
+      /* If just tried and failed to reuse lookahead token after an
+	 error, discard it.  */
+
+      if (yychar <= YYEOF)
+	{
+	  /* Return failure if at end of input.  */
+	  if (yychar == YYEOF)
+	    YYABORT;
+	}
+      else
+	{
+	  yydestruct ("Error: discarding",
+		      yytoken, &yylval);
+	  yychar = YYEMPTY;
+	}
+    }
+
+  /* Else will try to reuse lookahead token after shifting the error
+     token.  */
+  goto yyerrlab1;
+
+
+/*---------------------------------------------------.
+| yyerrorlab -- error raised explicitly by YYERROR.  |
+`---------------------------------------------------*/
+yyerrorlab:
+
+  /* Pacify compilers like GCC when the user code never invokes
+     YYERROR and the label yyerrorlab therefore never appears in user
+     code.  */
+  if (/*CONSTCOND*/ 0)
+     goto yyerrorlab;
+
+  /* Do not reclaim the symbols of the rule which action triggered
+     this YYERROR.  */
+  YYPOPSTACK (yylen);
+  yylen = 0;
+  YY_STACK_PRINT (yyss, yyssp);
+  yystate = *yyssp;
+  goto yyerrlab1;
+
+
+/*-------------------------------------------------------------.
+| yyerrlab1 -- common code for both syntax error and YYERROR.  |
+`-------------------------------------------------------------*/
+yyerrlab1:
+  yyerrstatus = 3;	/* Each real token shifted decrements this.  */
+
+  for (;;)
+    {
+      yyn = yypact[yystate];
+      if (!yypact_value_is_default (yyn))
+	{
+	  yyn += YYTERROR;
+	  if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
+	    {
+	      yyn = yytable[yyn];
+	      if (0 < yyn)
+		break;
+	    }
+	}
+
+      /* Pop the current state because it cannot handle the error token.  */
+      if (yyssp == yyss)
+	YYABORT;
+
+
+      yydestruct ("Error: popping",
+		  yystos[yystate], yyvsp);
+      YYPOPSTACK (1);
+      yystate = *yyssp;
+      YY_STACK_PRINT (yyss, yyssp);
+    }
+
+  *++yyvsp = yylval;
+
+
+  /* Shift the error token.  */
+  YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
+
+  yystate = yyn;
+  goto yynewstate;
+
+
+/*-------------------------------------.
+| yyacceptlab -- YYACCEPT comes here.  |
+`-------------------------------------*/
+yyacceptlab:
+  yyresult = 0;
+  goto yyreturn;
+
+/*-----------------------------------.
+| yyabortlab -- YYABORT comes here.  |
+`-----------------------------------*/
+yyabortlab:
+  yyresult = 1;
+  goto yyreturn;
+
+#if !defined yyoverflow || YYERROR_VERBOSE
+/*-------------------------------------------------.
+| yyexhaustedlab -- memory exhaustion comes here.  |
+`-------------------------------------------------*/
+yyexhaustedlab:
+  yyerror (YY_("memory exhausted"));
+  yyresult = 2;
+  /* Fall through.  */
+#endif
+
+yyreturn:
+  if (yychar != YYEMPTY)
+    {
+      /* Make sure we have latest lookahead translation.  See comments at
+         user semantic actions for why this is necessary.  */
+      yytoken = YYTRANSLATE (yychar);
+      yydestruct ("Cleanup: discarding lookahead",
+                  yytoken, &yylval);
+    }
+  /* Do not reclaim the symbols of the rule which action triggered
+     this YYABORT or YYACCEPT.  */
+  YYPOPSTACK (yylen);
+  YY_STACK_PRINT (yyss, yyssp);
+  while (yyssp != yyss)
+    {
+      yydestruct ("Cleanup: popping",
+		  yystos[*yyssp], yyvsp);
+      YYPOPSTACK (1);
+    }
+#ifndef yyoverflow
+  if (yyss != yyssa)
+    YYSTACK_FREE (yyss);
+#endif
+#if YYERROR_VERBOSE
+  if (yymsg != yymsgbuf)
+    YYSTACK_FREE (yymsg);
+#endif
+  /* Make sure YYID is used.  */
+  return YYID (yyresult);
+}
+
+
+
+
+
+void conf_parse(const char *name)
+{
+	struct symbol *sym;
+	int i;
+
+	zconf_initscan(name);
+
+	sym_init();
+	_menu_init();
+	rootmenu.prompt = menu_add_prompt(P_MENU, "Linux Kernel Configuration", NULL);
+
+	if (getenv("ZCONF_DEBUG"))
+		zconfdebug = 1;
+	zconfparse();
+	if (zconfnerrs)
+		exit(1);
+	if (!modules_sym)
+		modules_sym = sym_find( "n" );
+
+	rootmenu.prompt->text = _(rootmenu.prompt->text);
+	rootmenu.prompt->text = sym_expand_string_value(rootmenu.prompt->text);
+
+	menu_finalize(&rootmenu);
+	for_all_symbols(i, sym) {
+		if (sym_check_deps(sym))
+			zconfnerrs++;
+	}
+	if (zconfnerrs)
+		exit(1);
+	sym_set_change_count(1);
+}
+
+static const char *zconf_tokenname(int token)
+{
+	switch (token) {
+	case T_MENU:		return "menu";
+	case T_ENDMENU:		return "endmenu";
+	case T_CHOICE:		return "choice";
+	case T_ENDCHOICE:	return "endchoice";
+	case T_IF:		return "if";
+	case T_ENDIF:		return "endif";
+	case T_DEPENDS:		return "depends";
+	case T_VISIBLE:		return "visible";
+	}
+	return "<token>";
+}
+
+static bool zconf_endtoken(const struct kconf_id *id, int starttoken, int endtoken)
+{
+	if (id->token != endtoken) {
+		zconf_error("unexpected '%s' within %s block",
+			kconf_id_strings + id->name, zconf_tokenname(starttoken));
+		zconfnerrs++;
+		return false;
+	}
+	if (current_menu->file != current_file) {
+		zconf_error("'%s' in different file than '%s'",
+			kconf_id_strings + id->name, zconf_tokenname(starttoken));
+		fprintf(stderr, "%s:%d: location of the '%s'\n",
+			current_menu->file->name, current_menu->lineno,
+			zconf_tokenname(starttoken));
+		zconfnerrs++;
+		return false;
+	}
+	return true;
+}
+
+static void zconfprint(const char *err, ...)
+{
+	va_list ap;
+
+	fprintf(stderr, "%s:%d: ", zconf_curname(), zconf_lineno());
+	va_start(ap, err);
+	vfprintf(stderr, err, ap);
+	va_end(ap);
+	fprintf(stderr, "\n");
+}
+
+static void zconf_error(const char *err, ...)
+{
+	va_list ap;
+
+	zconfnerrs++;
+	fprintf(stderr, "%s:%d: ", zconf_curname(), zconf_lineno());
+	va_start(ap, err);
+	vfprintf(stderr, err, ap);
+	va_end(ap);
+	fprintf(stderr, "\n");
+}
+
+static void zconferror(const char *err)
+{
+	fprintf(stderr, "%s:%d: %s\n", zconf_curname(), zconf_lineno() + 1, err);
+}
+
+static void print_quoted_string(FILE *out, const char *str)
+{
+	const char *p;
+	int len;
+
+	putc('"', out);
+	while ((p = strchr(str, '"'))) {
+		len = p - str;
+		if (len)
+			fprintf(out, "%.*s", len, str);
+		fputs("\\\"", out);
+		str = p + 1;
+	}
+	fputs(str, out);
+	putc('"', out);
+}
+
+static void print_symbol(FILE *out, struct menu *menu)
+{
+	struct symbol *sym = menu->sym;
+	struct property *prop;
+
+	if (sym_is_choice(sym))
+		fprintf(out, "\nchoice\n");
+	else
+		fprintf(out, "\nconfig %s\n", sym->name);
+	switch (sym->type) {
+	case S_BOOLEAN:
+		fputs("  boolean\n", out);
+		break;
+	case S_TRISTATE:
+		fputs("  tristate\n", out);
+		break;
+	case S_STRING:
+		fputs("  string\n", out);
+		break;
+	case S_INT:
+		fputs("  integer\n", out);
+		break;
+	case S_HEX:
+		fputs("  hex\n", out);
+		break;
+	default:
+		fputs("  ???\n", out);
+		break;
+	}
+	for (prop = sym->prop; prop; prop = prop->next) {
+		if (prop->menu != menu)
+			continue;
+		switch (prop->type) {
+		case P_PROMPT:
+			fputs("  prompt ", out);
+			print_quoted_string(out, prop->text);
+			if (!expr_is_yes(prop->visible.expr)) {
+				fputs(" if ", out);
+				expr_fprint(prop->visible.expr, out);
+			}
+			fputc('\n', out);
+			break;
+		case P_DEFAULT:
+			fputs( "  default ", out);
+			expr_fprint(prop->expr, out);
+			if (!expr_is_yes(prop->visible.expr)) {
+				fputs(" if ", out);
+				expr_fprint(prop->visible.expr, out);
+			}
+			fputc('\n', out);
+			break;
+		case P_CHOICE:
+			fputs("  #choice value\n", out);
+			break;
+		case P_SELECT:
+			fputs( "  select ", out);
+			expr_fprint(prop->expr, out);
+			fputc('\n', out);
+			break;
+		case P_RANGE:
+			fputs( "  range ", out);
+			expr_fprint(prop->expr, out);
+			fputc('\n', out);
+			break;
+		case P_MENU:
+			fputs( "  menu ", out);
+			print_quoted_string(out, prop->text);
+			fputc('\n', out);
+			break;
+		default:
+			fprintf(out, "  unknown prop %d!\n", prop->type);
+			break;
+		}
+	}
+	if (menu->help) {
+		int len = strlen(menu->help);
+		while (menu->help[--len] == '\n')
+			menu->help[len] = 0;
+		fprintf(out, "  help\n%s\n", menu->help);
+	}
+}
+
+void zconfdump(FILE *out)
+{
+	struct property *prop;
+	struct symbol *sym;
+	struct menu *menu;
+
+	menu = rootmenu.list;
+	while (menu) {
+		if ((sym = menu->sym))
+			print_symbol(out, menu);
+		else if ((prop = menu->prompt)) {
+			switch (prop->type) {
+			case P_COMMENT:
+				fputs("\ncomment ", out);
+				print_quoted_string(out, prop->text);
+				fputs("\n", out);
+				break;
+			case P_MENU:
+				fputs("\nmenu ", out);
+				print_quoted_string(out, prop->text);
+				fputs("\n", out);
+				break;
+			default:
+				;
+			}
+			if (!expr_is_yes(prop->visible.expr)) {
+				fputs("  depends ", out);
+				expr_fprint(prop->visible.expr, out);
+				fputc('\n', out);
+			}
+		}
+
+		if (menu->list)
+			menu = menu->list;
+		else if (menu->next)
+			menu = menu->next;
+		else while ((menu = menu->parent)) {
+			if (menu->prompt && menu->prompt->type == P_MENU)
+				fputs("\nendmenu\n", out);
+			if (menu->next) {
+				menu = menu->next;
+				break;
+			}
+		}
+	}
+}
+
+#include "zconf.lex.c"
+#include "util.c"
+#include "confdata.c"
+#include "expr.c"
+#include "symbol.c"
+#include "menu.c"
+
Binary files linux-4.2/scripts/kconfig/zconf.tab.o and linux-4.2-pck/scripts/kconfig/zconf.tab.o differ
diff -Nur linux-4.2/scripts/mkcompile_h linux-4.2-pck/scripts/mkcompile_h
--- linux-4.2/scripts/mkcompile_h	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/scripts/mkcompile_h	2015-09-08 00:48:22.245029757 -0300
@@ -54,8 +54,8 @@
 fi
 
 UTS_VERSION="#$VERSION"
-CONFIG_FLAGS=""
-if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
+CONFIG_FLAGS="PCK"
+if [ -n "$SMP" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS SMP"; fi
 if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
 UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
 
diff -Nur linux-4.2/scripts/tuxonice_output_to_csv.sh linux-4.2-pck/scripts/tuxonice_output_to_csv.sh
--- linux-4.2/scripts/tuxonice_output_to_csv.sh	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/scripts/tuxonice_output_to_csv.sh	2015-09-08 11:20:32.583672945 -0300
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+cat $1 | grep "\*TOI\*" | cut -b 22- | sed "s/ /,/g" | sed "s/\.//" | sort -n > $1.tmp
+COLUMNS=$(cat $1.tmp | awk -F ',' ' { print $2 } ' | sort | uniq)
+echo -n "pfn," > $1.tmp2
+for NAME in $COLUMNS; do
+    echo -n "$NAME," >> $1.tmp2
+done
+echo >> $1.tmp2
+FIRST=1
+declare -A data
+while IFS=, read -r pfn column value; do
+    if [ $FIRST -eq 1 ]; then
+        FIRST=0
+        LAST_PFN=$pfn
+    fi
+    if [ $pfn -ne $LAST_PFN ]; then
+        echo -n "$LAST_PFN," >> $1.tmp2;
+        for NAME in $COLUMNS; do
+            echo -n "${data[$NAME]}," >> $1.tmp2
+        done
+        data=( )
+        echo >> $1.tmp2
+        LAST_PFN=$pfn
+    fi
+    if [ -z "$value" ]; then
+        data[$column]=X
+    else
+        data[$column]=$value
+    fi
+done < $1.tmp
+mv $1.tmp2 $1.csv
+rm $1.tmp
+LIBREOFFICE=$(which libreoffice)
+[ -n "$LIBREOFFICE" ] && libreoffice $1.csv &
diff -Nur linux-4.2/security/keys/big_key.c linux-4.2-pck/security/keys/big_key.c
--- linux-4.2/security/keys/big_key.c	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/security/keys/big_key.c	2015-09-08 11:20:32.627004164 -0300
@@ -68,7 +68,7 @@
 		 *
 		 * TODO: Encrypt the stored data with a temporary key.
 		 */
-		file = shmem_kernel_file_setup("", datalen, 0);
+		file = shmem_kernel_file_setup("", datalen, 0, 0);
 		if (IS_ERR(file)) {
 			ret = PTR_ERR(file);
 			goto error;
diff -Nur linux-4.2/tools/testing/selftests/Makefile linux-4.2-pck/tools/testing/selftests/Makefile
--- linux-4.2/tools/testing/selftests/Makefile	2015-08-30 15:34:09.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/Makefile	2015-09-08 00:48:22.245029757 -0300
@@ -6,6 +6,7 @@
 TARGETS += ftrace
 TARGETS += futex
 TARGETS += kcmp
+TARGETS += kdbus
 TARGETS += memfd
 TARGETS += memory-hotplug
 TARGETS += mount
diff -Nur linux-4.2/tools/testing/selftests/kdbus/.gitignore linux-4.2-pck/tools/testing/selftests/kdbus/.gitignore
--- linux-4.2/tools/testing/selftests/kdbus/.gitignore	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/.gitignore	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1 @@
+kdbus-test
diff -Nur linux-4.2/tools/testing/selftests/kdbus/Makefile linux-4.2-pck/tools/testing/selftests/kdbus/Makefile
--- linux-4.2/tools/testing/selftests/kdbus/Makefile	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/Makefile	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,49 @@
+CFLAGS += -I../../../../usr/include/
+CFLAGS += -I../../../../samples/kdbus/
+CFLAGS += -I../../../../include/uapi/
+CFLAGS += -std=gnu99
+CFLAGS += -DKBUILD_MODNAME=\"kdbus\" -D_GNU_SOURCE
+LDLIBS = -pthread -lcap -lm
+
+OBJS= \
+	kdbus-enum.o		\
+	kdbus-util.o		\
+	kdbus-test.o		\
+	kdbus-test.o		\
+	test-activator.o	\
+	test-benchmark.o	\
+	test-bus.o		\
+	test-chat.o		\
+	test-connection.o	\
+	test-daemon.o		\
+	test-endpoint.o		\
+	test-fd.o		\
+	test-free.o		\
+	test-match.o		\
+	test-message.o		\
+	test-metadata-ns.o	\
+	test-monitor.o		\
+	test-names.o		\
+	test-policy.o		\
+	test-policy-ns.o	\
+	test-policy-priv.o	\
+	test-sync.o		\
+	test-timeout.o
+
+all: kdbus-test
+
+include ../lib.mk
+
+%.o: %.c kdbus-enum.h kdbus-test.h kdbus-util.h
+	$(CC) $(CFLAGS) -c $< -o $@
+
+kdbus-test: $(OBJS)
+	$(CC) $(CFLAGS) $^ $(LDLIBS) -o $@
+
+TEST_PROGS := kdbus-test
+
+run_tests:
+	./kdbus-test --tap
+
+clean:
+	rm -f *.o kdbus-test
diff -Nur linux-4.2/tools/testing/selftests/kdbus/kdbus-enum.c linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-enum.c
--- linux-4.2/tools/testing/selftests/kdbus/kdbus-enum.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-enum.c	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+struct kdbus_enum_table {
+	long long id;
+	const char *name;
+};
+
+#define TABLE(what) static struct kdbus_enum_table kdbus_table_##what[]
+#define ENUM(_id) { .id = _id, .name = STRINGIFY(_id) }
+#define LOOKUP(what)							\
+	const char *enum_##what(long long id)				\
+	{								\
+		for (size_t i = 0; i < ELEMENTSOF(kdbus_table_##what); i++) \
+			if (id == kdbus_table_##what[i].id)		\
+				return kdbus_table_##what[i].name;	\
+		return "UNKNOWN";					\
+	}
+
+TABLE(CMD) = {
+	ENUM(KDBUS_CMD_BUS_MAKE),
+	ENUM(KDBUS_CMD_ENDPOINT_MAKE),
+	ENUM(KDBUS_CMD_HELLO),
+	ENUM(KDBUS_CMD_SEND),
+	ENUM(KDBUS_CMD_RECV),
+	ENUM(KDBUS_CMD_LIST),
+	ENUM(KDBUS_CMD_NAME_RELEASE),
+	ENUM(KDBUS_CMD_CONN_INFO),
+	ENUM(KDBUS_CMD_MATCH_ADD),
+	ENUM(KDBUS_CMD_MATCH_REMOVE),
+};
+LOOKUP(CMD);
+
+TABLE(MSG) = {
+	ENUM(_KDBUS_ITEM_NULL),
+	ENUM(KDBUS_ITEM_PAYLOAD_VEC),
+	ENUM(KDBUS_ITEM_PAYLOAD_OFF),
+	ENUM(KDBUS_ITEM_PAYLOAD_MEMFD),
+	ENUM(KDBUS_ITEM_FDS),
+	ENUM(KDBUS_ITEM_BLOOM_PARAMETER),
+	ENUM(KDBUS_ITEM_BLOOM_FILTER),
+	ENUM(KDBUS_ITEM_DST_NAME),
+	ENUM(KDBUS_ITEM_MAKE_NAME),
+	ENUM(KDBUS_ITEM_ATTACH_FLAGS_SEND),
+	ENUM(KDBUS_ITEM_ATTACH_FLAGS_RECV),
+	ENUM(KDBUS_ITEM_ID),
+	ENUM(KDBUS_ITEM_NAME),
+	ENUM(KDBUS_ITEM_TIMESTAMP),
+	ENUM(KDBUS_ITEM_CREDS),
+	ENUM(KDBUS_ITEM_PIDS),
+	ENUM(KDBUS_ITEM_AUXGROUPS),
+	ENUM(KDBUS_ITEM_OWNED_NAME),
+	ENUM(KDBUS_ITEM_TID_COMM),
+	ENUM(KDBUS_ITEM_PID_COMM),
+	ENUM(KDBUS_ITEM_EXE),
+	ENUM(KDBUS_ITEM_CMDLINE),
+	ENUM(KDBUS_ITEM_CGROUP),
+	ENUM(KDBUS_ITEM_CAPS),
+	ENUM(KDBUS_ITEM_SECLABEL),
+	ENUM(KDBUS_ITEM_AUDIT),
+	ENUM(KDBUS_ITEM_CONN_DESCRIPTION),
+	ENUM(KDBUS_ITEM_NAME_ADD),
+	ENUM(KDBUS_ITEM_NAME_REMOVE),
+	ENUM(KDBUS_ITEM_NAME_CHANGE),
+	ENUM(KDBUS_ITEM_ID_ADD),
+	ENUM(KDBUS_ITEM_ID_REMOVE),
+	ENUM(KDBUS_ITEM_REPLY_TIMEOUT),
+	ENUM(KDBUS_ITEM_REPLY_DEAD),
+};
+LOOKUP(MSG);
+
+TABLE(PAYLOAD) = {
+	ENUM(KDBUS_PAYLOAD_KERNEL),
+	ENUM(KDBUS_PAYLOAD_DBUS),
+};
+LOOKUP(PAYLOAD);
diff -Nur linux-4.2/tools/testing/selftests/kdbus/kdbus-enum.h linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-enum.h
--- linux-4.2/tools/testing/selftests/kdbus/kdbus-enum.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-enum.h	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#pragma once
+
+const char *enum_CMD(long long id);
+const char *enum_MSG(long long id);
+const char *enum_MATCH(long long id);
+const char *enum_PAYLOAD(long long id);
diff -Nur linux-4.2/tools/testing/selftests/kdbus/kdbus-test.c linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-test.c
--- linux-4.2/tools/testing/selftests/kdbus/kdbus-test.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-test.c	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,905 @@
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/eventfd.h>
+#include <linux/sched.h>
+
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+#include "kdbus-test.h"
+
+enum {
+	TEST_CREATE_BUS		= 1 << 0,
+	TEST_CREATE_CONN	= 1 << 1,
+};
+
+struct kdbus_test {
+	const char *name;
+	const char *desc;
+	int (*func)(struct kdbus_test_env *env);
+	unsigned int flags;
+};
+
+struct kdbus_test_args {
+	bool mntns;
+	bool pidns;
+	bool userns;
+	char *uid_map;
+	char *gid_map;
+	int loop;
+	int wait;
+	int fork;
+	int tap_output;
+	char *module;
+	char *root;
+	char *test;
+	char *busname;
+};
+
+static const struct kdbus_test tests[] = {
+	{
+		.name	= "bus-make",
+		.desc	= "bus make functions",
+		.func	= kdbus_test_bus_make,
+		.flags	= 0,
+	},
+	{
+		.name	= "hello",
+		.desc	= "the HELLO command",
+		.func	= kdbus_test_hello,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "byebye",
+		.desc	= "the BYEBYE command",
+		.func	= kdbus_test_byebye,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "chat",
+		.desc	= "a chat pattern",
+		.func	= kdbus_test_chat,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "daemon",
+		.desc	= "a simple daemon",
+		.func	= kdbus_test_daemon,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "fd-passing",
+		.desc	= "file descriptor passing",
+		.func	= kdbus_test_fd_passing,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "endpoint",
+		.desc	= "custom endpoint",
+		.func	= kdbus_test_custom_endpoint,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "monitor",
+		.desc	= "monitor functionality",
+		.func	= kdbus_test_monitor,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "name-basics",
+		.desc	= "basic name registry functions",
+		.func	= kdbus_test_name_basic,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "name-conflict",
+		.desc	= "name registry conflict details",
+		.func	= kdbus_test_name_conflict,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "name-queue",
+		.desc	= "queuing of names",
+		.func	= kdbus_test_name_queue,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "name-takeover",
+		.desc	= "takeover of names",
+		.func	= kdbus_test_name_takeover,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "message-basic",
+		.desc	= "basic message handling",
+		.func	= kdbus_test_message_basic,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "message-prio",
+		.desc	= "handling of messages with priority",
+		.func	= kdbus_test_message_prio,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "message-quota",
+		.desc	= "message quotas are enforced",
+		.func	= kdbus_test_message_quota,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "memory-access",
+		.desc	= "memory access",
+		.func	= kdbus_test_memory_access,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "timeout",
+		.desc	= "timeout",
+		.func	= kdbus_test_timeout,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "sync-byebye",
+		.desc	= "synchronous replies vs. BYEBYE",
+		.func	= kdbus_test_sync_byebye,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "sync-reply",
+		.desc	= "synchronous replies",
+		.func	= kdbus_test_sync_reply,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "message-free",
+		.desc	= "freeing of memory",
+		.func	= kdbus_test_free,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "connection-info",
+		.desc	= "retrieving connection information",
+		.func	= kdbus_test_conn_info,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "connection-update",
+		.desc	= "updating connection information",
+		.func	= kdbus_test_conn_update,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "writable-pool",
+		.desc	= "verifying pools are never writable",
+		.func	= kdbus_test_writable_pool,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "policy",
+		.desc	= "policy",
+		.func	= kdbus_test_policy,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "policy-priv",
+		.desc	= "unprivileged bus access",
+		.func	= kdbus_test_policy_priv,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "policy-ns",
+		.desc	= "policy in user namespaces",
+		.func	= kdbus_test_policy_ns,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "metadata-ns",
+		.desc	= "metadata in different namespaces",
+		.func	= kdbus_test_metadata_ns,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "match-id-add",
+		.desc	= "adding of matches by id",
+		.func	= kdbus_test_match_id_add,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "match-id-remove",
+		.desc	= "removing of matches by id",
+		.func	= kdbus_test_match_id_remove,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "match-replace",
+		.desc	= "replace of matches with the same cookie",
+		.func	= kdbus_test_match_replace,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "match-name-add",
+		.desc	= "adding of matches by name",
+		.func	= kdbus_test_match_name_add,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "match-name-remove",
+		.desc	= "removing of matches by name",
+		.func	= kdbus_test_match_name_remove,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "match-name-change",
+		.desc	= "matching for name changes",
+		.func	= kdbus_test_match_name_change,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "match-bloom",
+		.desc	= "matching with bloom filters",
+		.func	= kdbus_test_match_bloom,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "activator",
+		.desc	= "activator connections",
+		.func	= kdbus_test_activator,
+		.flags	= TEST_CREATE_BUS | TEST_CREATE_CONN,
+	},
+	{
+		.name	= "benchmark",
+		.desc	= "benchmark",
+		.func	= kdbus_test_benchmark,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "benchmark-nomemfds",
+		.desc	= "benchmark without using memfds",
+		.func	= kdbus_test_benchmark_nomemfds,
+		.flags	= TEST_CREATE_BUS,
+	},
+	{
+		.name	= "benchmark-uds",
+		.desc	= "benchmark comparison to UDS",
+		.func	= kdbus_test_benchmark_uds,
+		.flags	= TEST_CREATE_BUS,
+	},
+};
+
+#define N_TESTS ((int) (sizeof(tests) / sizeof(tests[0])))
+
+static int test_prepare_env(const struct kdbus_test *t,
+			    const struct kdbus_test_args *args,
+			    struct kdbus_test_env *env)
+{
+	if (t->flags & TEST_CREATE_BUS) {
+		char *s;
+		char *n = NULL;
+		int ret;
+
+		asprintf(&s, "%s/control", args->root);
+
+		env->control_fd = open(s, O_RDWR);
+		free(s);
+		ASSERT_RETURN(env->control_fd >= 0);
+
+		if (!args->busname) {
+			n = unique_name("test-bus");
+			ASSERT_RETURN(n);
+		}
+
+		ret = kdbus_create_bus(env->control_fd,
+				       args->busname ?: n,
+				       _KDBUS_ATTACH_ALL, &s);
+		free(n);
+		ASSERT_RETURN(ret == 0);
+
+		asprintf(&env->buspath, "%s/%s/bus", args->root, s);
+		free(s);
+	}
+
+	if (t->flags & TEST_CREATE_CONN) {
+		env->conn = kdbus_hello(env->buspath, 0, NULL, 0);
+		ASSERT_RETURN(env->conn);
+	}
+
+	env->root = args->root;
+	env->module = args->module;
+
+	return 0;
+}
+
+void test_unprepare_env(const struct kdbus_test *t, struct kdbus_test_env *env)
+{
+	if (env->conn) {
+		kdbus_conn_free(env->conn);
+		env->conn = NULL;
+	}
+
+	if (env->control_fd >= 0) {
+		close(env->control_fd);
+		env->control_fd = -1;
+	}
+
+	if (env->buspath) {
+		free(env->buspath);
+		env->buspath = NULL;
+	}
+}
+
+static int test_run(const struct kdbus_test *t,
+		    const struct kdbus_test_args *kdbus_args,
+		    int wait)
+{
+	int ret;
+	struct kdbus_test_env env = {};
+
+	ret = test_prepare_env(t, kdbus_args, &env);
+	if (ret != TEST_OK)
+		return ret;
+
+	if (wait > 0) {
+		printf("Sleeping %d seconds before running test ...\n", wait);
+		sleep(wait);
+	}
+
+	ret = t->func(&env);
+	test_unprepare_env(t, &env);
+	return ret;
+}
+
+static int test_run_forked(const struct kdbus_test *t,
+			   const struct kdbus_test_args *kdbus_args,
+			   int wait)
+{
+	int ret;
+	pid_t pid;
+
+	pid = fork();
+	if (pid < 0) {
+		return TEST_ERR;
+	} else if (pid == 0) {
+		ret = test_run(t, kdbus_args, wait);
+		_exit(ret);
+	}
+
+	pid = waitpid(pid, &ret, 0);
+	if (pid <= 0)
+		return TEST_ERR;
+	else if (!WIFEXITED(ret))
+		return TEST_ERR;
+	else
+		return WEXITSTATUS(ret);
+}
+
+static void print_test_result(int ret)
+{
+	switch (ret) {
+	case TEST_OK:
+		printf("OK");
+		break;
+	case TEST_SKIP:
+		printf("SKIPPED");
+		break;
+	case TEST_ERR:
+		printf("ERROR");
+		break;
+	}
+}
+
+static int start_all_tests(struct kdbus_test_args *kdbus_args)
+{
+	int ret;
+	unsigned int fail_cnt = 0;
+	unsigned int skip_cnt = 0;
+	unsigned int ok_cnt = 0;
+	unsigned int i;
+
+	if (kdbus_args->tap_output) {
+		printf("1..%d\n", N_TESTS);
+		fflush(stdout);
+	}
+
+	kdbus_util_verbose = false;
+
+	for (i = 0; i < N_TESTS; i++) {
+		const struct kdbus_test *t = tests + i;
+
+		if (!kdbus_args->tap_output) {
+			unsigned int n;
+
+			printf("Testing %s (%s) ", t->desc, t->name);
+			for (n = 0; n < 60 - strlen(t->desc) - strlen(t->name); n++)
+				printf(".");
+			printf(" ");
+		}
+
+		ret = test_run_forked(t, kdbus_args, 0);
+		switch (ret) {
+		case TEST_OK:
+			ok_cnt++;
+			break;
+		case TEST_SKIP:
+			skip_cnt++;
+			break;
+		case TEST_ERR:
+			fail_cnt++;
+			break;
+		}
+
+		if (kdbus_args->tap_output) {
+			printf("%sok %d - %s%s (%s)\n",
+			       (ret == TEST_ERR) ? "not " : "", i + 1,
+			       (ret == TEST_SKIP) ? "# SKIP " : "",
+			       t->desc, t->name);
+			fflush(stdout);
+		} else {
+			print_test_result(ret);
+			printf("\n");
+		}
+	}
+
+	if (kdbus_args->tap_output)
+		printf("Failed %d/%d tests, %.2f%% okay\n", fail_cnt, N_TESTS,
+		       100.0 - (fail_cnt * 100.0) / ((float) N_TESTS));
+	else
+		printf("\nSUMMARY: %u tests passed, %u skipped, %u failed\n",
+		       ok_cnt, skip_cnt, fail_cnt);
+
+	return fail_cnt > 0 ? TEST_ERR : TEST_OK;
+}
+
+static int start_one_test(struct kdbus_test_args *kdbus_args)
+{
+	int i, ret;
+	bool test_found = false;
+
+	for (i = 0; i < N_TESTS; i++) {
+		const struct kdbus_test *t = tests + i;
+
+		if (strcmp(t->name, kdbus_args->test))
+			continue;
+
+		do {
+			test_found = true;
+			if (kdbus_args->fork)
+				ret = test_run_forked(t, kdbus_args,
+						      kdbus_args->wait);
+			else
+				ret = test_run(t, kdbus_args,
+					       kdbus_args->wait);
+
+			printf("Testing %s: ", t->desc);
+			print_test_result(ret);
+			printf("\n");
+
+			if (ret != TEST_OK)
+				break;
+		} while (kdbus_args->loop);
+
+		return ret;
+	}
+
+	if (!test_found) {
+		printf("Unknown test-id '%s'\n", kdbus_args->test);
+		return TEST_ERR;
+	}
+
+	return TEST_OK;
+}
+
+static void usage(const char *argv0)
+{
+	unsigned int i, j;
+
+	printf("Usage: %s [options]\n"
+	       "Options:\n"
+	       "\t-a, --tap		Output test results in TAP format\n"
+	       "\t-m, --module <module>	Kdbus module name\n"
+	       "\t-x, --loop		Run in a loop\n"
+	       "\t-f, --fork		Fork before running a test\n"
+	       "\t-h, --help		Print this help\n"
+	       "\t-r, --root <root>	Toplevel of the kdbus hierarchy\n"
+	       "\t-t, --test <test-id>	Run one specific test only, in verbose mode\n"
+	       "\t-b, --bus <busname>	Instead of generating a random bus name, take <busname>.\n"
+	       "\t-w, --wait <secs>	Wait <secs> before actually starting test\n"
+	       "\t    --mntns		New mount namespace\n"
+	       "\t    --pidns		New PID namespace\n"
+	       "\t    --userns		New user namespace\n"
+	       "\t    --uidmap uid_map	UID map for user namespace\n"
+	       "\t    --gidmap gid_map	GID map for user namespace\n"
+	       "\n", argv0);
+
+	printf("By default, all test are run once, and a summary is printed.\n"
+	       "Available tests for --test:\n\n");
+
+	for (i = 0; i < N_TESTS; i++) {
+		const struct kdbus_test *t = tests + i;
+
+		printf("\t%s", t->name);
+
+		for (j = 0; j < 24 - strlen(t->name); j++)
+			printf(" ");
+
+		printf("Test %s\n", t->desc);
+	}
+
+	printf("\n");
+	printf("Note that some tests may, if run specifically by --test, "
+	       "behave differently, and not terminate by themselves.\n");
+
+	exit(EXIT_FAILURE);
+}
+
+void print_kdbus_test_args(struct kdbus_test_args *args)
+{
+	if (args->userns || args->pidns || args->mntns)
+		printf("# Starting tests in new %s%s%s namespaces%s\n",
+			args->mntns ? "MOUNT " : "",
+			args->pidns ? "PID " : "",
+			args->userns ? "USER " : "",
+			args->mntns ? ", kdbusfs will be remounted" : "");
+	else
+		printf("# Starting tests in the same namespaces\n");
+}
+
+void print_metadata_support(void)
+{
+	bool no_meta_audit, no_meta_cgroups, no_meta_seclabel;
+
+	/*
+	 * KDBUS_ATTACH_CGROUP, KDBUS_ATTACH_AUDIT and
+	 * KDBUS_ATTACH_SECLABEL
+	 */
+	no_meta_audit = !config_auditsyscall_is_enabled();
+	no_meta_cgroups = !config_cgroups_is_enabled();
+	no_meta_seclabel = !config_security_is_enabled();
+
+	if (no_meta_audit | no_meta_cgroups | no_meta_seclabel)
+		printf("# Starting tests without %s%s%s metadata support\n",
+		       no_meta_audit ? "AUDIT " : "",
+		       no_meta_cgroups ? "CGROUP " : "",
+		       no_meta_seclabel ? "SECLABEL " : "");
+	else
+		printf("# Starting tests with full metadata support\n");
+}
+
+int run_tests(struct kdbus_test_args *kdbus_args)
+{
+	int ret;
+	static char control[4096];
+
+	snprintf(control, sizeof(control), "%s/control", kdbus_args->root);
+
+	if (access(control, W_OK) < 0) {
+		printf("Unable to locate control node at '%s'.\n",
+			control);
+		return TEST_ERR;
+	}
+
+	if (kdbus_args->test) {
+		ret = start_one_test(kdbus_args);
+	} else {
+		do {
+			ret = start_all_tests(kdbus_args);
+			if (ret != TEST_OK)
+				break;
+		} while (kdbus_args->loop);
+	}
+
+	return ret;
+}
+
+static void nop_handler(int sig) {}
+
+static int test_prepare_mounts(struct kdbus_test_args *kdbus_args)
+{
+	int ret;
+	char kdbusfs[64] = {'\0'};
+
+	snprintf(kdbusfs, sizeof(kdbusfs), "%sfs", kdbus_args->module);
+
+	/* make current mount slave */
+	ret = mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL);
+	if (ret < 0) {
+		ret = -errno;
+		printf("error mount() root: %d (%m)\n", ret);
+		return ret;
+	}
+
+	/* Remount procfs since we need it in our tests */
+	if (kdbus_args->pidns) {
+		ret = mount("proc", "/proc", "proc",
+			    MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+		if (ret < 0) {
+			ret = -errno;
+			printf("error mount() /proc : %d (%m)\n", ret);
+			return ret;
+		}
+	}
+
+	/* Remount kdbusfs */
+	ret = mount(kdbusfs, kdbus_args->root, kdbusfs,
+		    MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+	if (ret < 0) {
+		ret = -errno;
+		printf("error mount() %s :%d (%m)\n", kdbusfs, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int run_tests_in_namespaces(struct kdbus_test_args *kdbus_args)
+{
+	int ret;
+	int efd = -1;
+	int status;
+	pid_t pid, rpid;
+	struct sigaction oldsa;
+	struct sigaction sa = {
+		.sa_handler = nop_handler,
+		.sa_flags = SA_NOCLDSTOP,
+	};
+
+	efd = eventfd(0, EFD_CLOEXEC);
+	if (efd < 0) {
+		ret = -errno;
+		printf("eventfd() failed: %d (%m)\n", ret);
+		return TEST_ERR;
+	}
+
+	ret = sigaction(SIGCHLD, &sa, &oldsa);
+	if (ret < 0) {
+		ret = -errno;
+		printf("sigaction() failed: %d (%m)\n", ret);
+		return TEST_ERR;
+	}
+
+	/* setup namespaces */
+	pid = syscall(__NR_clone, SIGCHLD|
+		      (kdbus_args->userns ? CLONE_NEWUSER : 0) |
+		      (kdbus_args->mntns ? CLONE_NEWNS : 0) |
+		      (kdbus_args->pidns ? CLONE_NEWPID : 0), NULL);
+	if (pid < 0) {
+		printf("clone() failed: %d (%m)\n", -errno);
+		return TEST_ERR;
+	}
+
+	if (pid == 0) {
+		eventfd_t event_status = 0;
+
+		ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
+		if (ret < 0) {
+			ret = -errno;
+			printf("error prctl(): %d (%m)\n", ret);
+			_exit(TEST_ERR);
+		}
+
+		/* reset sighandlers of childs */
+		ret = sigaction(SIGCHLD, &oldsa, NULL);
+		if (ret < 0) {
+			ret = -errno;
+			printf("sigaction() failed: %d (%m)\n", ret);
+			_exit(TEST_ERR);
+		}
+
+		ret = eventfd_read(efd, &event_status);
+		if (ret < 0 || event_status != 1) {
+			printf("error eventfd_read()\n");
+			_exit(TEST_ERR);
+		}
+
+		if (kdbus_args->mntns) {
+			ret = test_prepare_mounts(kdbus_args);
+			if (ret < 0) {
+				printf("error preparing mounts\n");
+				_exit(TEST_ERR);
+			}
+		}
+
+		ret = run_tests(kdbus_args);
+		_exit(ret);
+	}
+
+	/* Setup userns mapping */
+	if (kdbus_args->userns) {
+		ret = userns_map_uid_gid(pid, kdbus_args->uid_map,
+					 kdbus_args->gid_map);
+		if (ret < 0) {
+			printf("error mapping uid and gid in userns\n");
+			eventfd_write(efd, 2);
+			return TEST_ERR;
+		}
+	}
+
+	ret = eventfd_write(efd, 1);
+	if (ret < 0) {
+		ret = -errno;
+		printf("error eventfd_write(): %d (%m)\n", ret);
+		return TEST_ERR;
+	}
+
+	rpid = waitpid(pid, &status, 0);
+	ASSERT_RETURN_VAL(rpid == pid, TEST_ERR);
+
+	close(efd);
+
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+		return TEST_ERR;
+
+	return TEST_OK;
+}
+
+int start_tests(struct kdbus_test_args *kdbus_args)
+{
+	int ret;
+	bool namespaces;
+	static char fspath[4096];
+
+	namespaces = (kdbus_args->mntns || kdbus_args->pidns ||
+		      kdbus_args->userns);
+
+	/* for pidns we need mntns set */
+	if (kdbus_args->pidns && !kdbus_args->mntns) {
+		printf("Failed: please set both pid and mnt namesapces\n");
+		return TEST_ERR;
+	}
+
+	if (kdbus_args->userns) {
+		if (!config_user_ns_is_enabled()) {
+			printf("User namespace not supported\n");
+			return TEST_ERR;
+		}
+
+		if (!kdbus_args->uid_map || !kdbus_args->gid_map) {
+			printf("Failed: please specify uid or gid mapping\n");
+			return TEST_ERR;
+		}
+	}
+
+	print_kdbus_test_args(kdbus_args);
+	print_metadata_support();
+
+	/* setup kdbus paths */
+	if (!kdbus_args->module)
+		kdbus_args->module = "kdbus";
+
+	if (!kdbus_args->root) {
+		snprintf(fspath, sizeof(fspath), "/sys/fs/%s",
+			 kdbus_args->module);
+		kdbus_args->root = fspath;
+	}
+
+	/* Start tests */
+	if (namespaces)
+		ret = run_tests_in_namespaces(kdbus_args);
+	else
+		ret = run_tests(kdbus_args);
+
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	int t, ret = 0;
+	struct kdbus_test_args *kdbus_args;
+	enum {
+		ARG_MNTNS = 0x100,
+		ARG_PIDNS,
+		ARG_USERNS,
+		ARG_UIDMAP,
+		ARG_GIDMAP,
+	};
+
+	kdbus_args = malloc(sizeof(*kdbus_args));
+	if (!kdbus_args) {
+		printf("unable to malloc() kdbus_args\n");
+		return EXIT_FAILURE;
+	}
+
+	memset(kdbus_args, 0, sizeof(*kdbus_args));
+
+	static const struct option options[] = {
+		{ "loop",	no_argument,		NULL, 'x' },
+		{ "help",	no_argument,		NULL, 'h' },
+		{ "root",	required_argument,	NULL, 'r' },
+		{ "test",	required_argument,	NULL, 't' },
+		{ "bus",	required_argument,	NULL, 'b' },
+		{ "wait",	required_argument,	NULL, 'w' },
+		{ "fork",	no_argument,		NULL, 'f' },
+		{ "module",	required_argument,	NULL, 'm' },
+		{ "tap",	no_argument,		NULL, 'a' },
+		{ "mntns",	no_argument,		NULL, ARG_MNTNS },
+		{ "pidns",	no_argument,		NULL, ARG_PIDNS },
+		{ "userns",	no_argument,		NULL, ARG_USERNS },
+		{ "uidmap",	required_argument,	NULL, ARG_UIDMAP },
+		{ "gidmap",	required_argument,	NULL, ARG_GIDMAP },
+		{}
+	};
+
+	srand(time(NULL));
+
+	while ((t = getopt_long(argc, argv, "hxfm:r:t:b:w:a", options, NULL)) >= 0) {
+		switch (t) {
+		case 'x':
+			kdbus_args->loop = 1;
+			break;
+
+		case 'm':
+			kdbus_args->module = optarg;
+			break;
+
+		case 'r':
+			kdbus_args->root = optarg;
+			break;
+
+		case 't':
+			kdbus_args->test = optarg;
+			break;
+
+		case 'b':
+			kdbus_args->busname = optarg;
+			break;
+
+		case 'w':
+			kdbus_args->wait = strtol(optarg, NULL, 10);
+			break;
+
+		case 'f':
+			kdbus_args->fork = 1;
+			break;
+
+		case 'a':
+			kdbus_args->tap_output = 1;
+			break;
+
+		case ARG_MNTNS:
+			kdbus_args->mntns = true;
+			break;
+
+		case ARG_PIDNS:
+			kdbus_args->pidns = true;
+			break;
+
+		case ARG_USERNS:
+			kdbus_args->userns = true;
+			break;
+
+		case ARG_UIDMAP:
+			kdbus_args->uid_map = optarg;
+			break;
+
+		case ARG_GIDMAP:
+			kdbus_args->gid_map = optarg;
+			break;
+
+		default:
+		case 'h':
+			usage(argv[0]);
+		}
+	}
+
+	ret = start_tests(kdbus_args);
+	if (ret == TEST_ERR)
+		return EXIT_FAILURE;
+
+	free(kdbus_args);
+
+	return 0;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/kdbus-test.h linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-test.h
--- linux-4.2/tools/testing/selftests/kdbus/kdbus-test.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-test.h	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,84 @@
+#ifndef _TEST_KDBUS_H_
+#define _TEST_KDBUS_H_
+
+struct kdbus_test_env {
+	char *buspath;
+	const char *root;
+	const char *module;
+	int control_fd;
+	struct kdbus_conn *conn;
+};
+
+enum {
+	TEST_OK,
+	TEST_SKIP,
+	TEST_ERR,
+};
+
+#define ASSERT_RETURN_VAL(cond, val)		\
+	if (!(cond)) {			\
+		fprintf(stderr,	"Assertion '%s' failed in %s(), %s:%d\n", \
+			#cond, __func__, __FILE__, __LINE__);	\
+		return val;	\
+	}
+
+#define ASSERT_EXIT_VAL(cond, val)		\
+	if (!(cond)) {			\
+		fprintf(stderr, "Assertion '%s' failed in %s(), %s:%d\n", \
+			#cond, __func__, __FILE__, __LINE__);	\
+		_exit(val);	\
+	}
+
+#define ASSERT_BREAK(cond)		\
+	if (!(cond)) {			\
+		fprintf(stderr, "Assertion '%s' failed in %s(), %s:%d\n", \
+			#cond, __func__, __FILE__, __LINE__);	\
+		break; \
+	}
+
+#define ASSERT_RETURN(cond)		\
+	ASSERT_RETURN_VAL(cond, TEST_ERR)
+
+#define ASSERT_EXIT(cond)		\
+	ASSERT_EXIT_VAL(cond, EXIT_FAILURE)
+
+int kdbus_test_activator(struct kdbus_test_env *env);
+int kdbus_test_benchmark(struct kdbus_test_env *env);
+int kdbus_test_benchmark_nomemfds(struct kdbus_test_env *env);
+int kdbus_test_benchmark_uds(struct kdbus_test_env *env);
+int kdbus_test_bus_make(struct kdbus_test_env *env);
+int kdbus_test_byebye(struct kdbus_test_env *env);
+int kdbus_test_chat(struct kdbus_test_env *env);
+int kdbus_test_conn_info(struct kdbus_test_env *env);
+int kdbus_test_conn_update(struct kdbus_test_env *env);
+int kdbus_test_daemon(struct kdbus_test_env *env);
+int kdbus_test_custom_endpoint(struct kdbus_test_env *env);
+int kdbus_test_fd_passing(struct kdbus_test_env *env);
+int kdbus_test_free(struct kdbus_test_env *env);
+int kdbus_test_hello(struct kdbus_test_env *env);
+int kdbus_test_match_bloom(struct kdbus_test_env *env);
+int kdbus_test_match_id_add(struct kdbus_test_env *env);
+int kdbus_test_match_id_remove(struct kdbus_test_env *env);
+int kdbus_test_match_replace(struct kdbus_test_env *env);
+int kdbus_test_match_name_add(struct kdbus_test_env *env);
+int kdbus_test_match_name_change(struct kdbus_test_env *env);
+int kdbus_test_match_name_remove(struct kdbus_test_env *env);
+int kdbus_test_message_basic(struct kdbus_test_env *env);
+int kdbus_test_message_prio(struct kdbus_test_env *env);
+int kdbus_test_message_quota(struct kdbus_test_env *env);
+int kdbus_test_memory_access(struct kdbus_test_env *env);
+int kdbus_test_metadata_ns(struct kdbus_test_env *env);
+int kdbus_test_monitor(struct kdbus_test_env *env);
+int kdbus_test_name_basic(struct kdbus_test_env *env);
+int kdbus_test_name_conflict(struct kdbus_test_env *env);
+int kdbus_test_name_queue(struct kdbus_test_env *env);
+int kdbus_test_name_takeover(struct kdbus_test_env *env);
+int kdbus_test_policy(struct kdbus_test_env *env);
+int kdbus_test_policy_ns(struct kdbus_test_env *env);
+int kdbus_test_policy_priv(struct kdbus_test_env *env);
+int kdbus_test_sync_byebye(struct kdbus_test_env *env);
+int kdbus_test_sync_reply(struct kdbus_test_env *env);
+int kdbus_test_timeout(struct kdbus_test_env *env);
+int kdbus_test_writable_pool(struct kdbus_test_env *env);
+
+#endif /* _TEST_KDBUS_H_ */
diff -Nur linux-4.2/tools/testing/selftests/kdbus/kdbus-util.c linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-util.c
--- linux-4.2/tools/testing/selftests/kdbus/kdbus-util.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-util.c	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,1612 @@
+/*
+ * Copyright (C) 2013-2015 Daniel Mack
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2014-2015 Djalal Harouni
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <time.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <assert.h>
+#include <poll.h>
+#include <grp.h>
+#include <sys/capability.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <linux/unistd.h>
+#include <linux/memfd.h>
+
+#ifndef __NR_memfd_create
+  #ifdef __x86_64__
+    #define __NR_memfd_create 319
+  #elif defined __arm__
+    #define __NR_memfd_create 385
+  #else
+    #define __NR_memfd_create 356
+  #endif
+#endif
+
+#include "kdbus-api.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+#ifndef F_ADD_SEALS
+#define F_LINUX_SPECIFIC_BASE	1024
+#define F_ADD_SEALS     (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS     (F_LINUX_SPECIFIC_BASE + 10)
+
+#define F_SEAL_SEAL     0x0001  /* prevent further seals from being set */
+#define F_SEAL_SHRINK   0x0002  /* prevent file from shrinking */
+#define F_SEAL_GROW     0x0004  /* prevent file from growing */
+#define F_SEAL_WRITE    0x0008  /* prevent writes */
+#endif
+
+int kdbus_util_verbose = true;
+
+int kdbus_sysfs_get_parameter_mask(const char *path, uint64_t *mask)
+{
+	int ret;
+	FILE *file;
+	unsigned long long value;
+
+	file = fopen(path, "r");
+	if (!file) {
+		ret = -errno;
+		kdbus_printf("--- error fopen(): %d (%m)\n", ret);
+		return ret;
+	}
+
+	ret = fscanf(file, "%llu", &value);
+	if (ret != 1) {
+		if (ferror(file))
+			ret = -errno;
+		else
+			ret = -EIO;
+
+		kdbus_printf("--- error fscanf(): %d\n", ret);
+		fclose(file);
+		return ret;
+	}
+
+	*mask = (uint64_t)value;
+
+	fclose(file);
+
+	return 0;
+}
+
+int kdbus_sysfs_set_parameter_mask(const char *path, uint64_t mask)
+{
+	int ret;
+	FILE *file;
+
+	file = fopen(path, "w");
+	if (!file) {
+		ret = -errno;
+		kdbus_printf("--- error open(): %d (%m)\n", ret);
+		return ret;
+	}
+
+	ret = fprintf(file, "%llu", (unsigned long long)mask);
+	if (ret <= 0) {
+		ret = -EIO;
+		kdbus_printf("--- error fprintf(): %d\n", ret);
+	}
+
+	fclose(file);
+
+	return ret > 0 ? 0 : ret;
+}
+
+int kdbus_create_bus(int control_fd, const char *name,
+		     uint64_t owner_meta, char **path)
+{
+	struct {
+		struct kdbus_cmd cmd;
+
+		/* bloom size item */
+		struct {
+			uint64_t size;
+			uint64_t type;
+			struct kdbus_bloom_parameter bloom;
+		} bp;
+
+		/* owner metadata items */
+		struct {
+			uint64_t size;
+			uint64_t type;
+			uint64_t flags;
+		} attach;
+
+		/* name item */
+		struct {
+			uint64_t size;
+			uint64_t type;
+			char str[64];
+		} name;
+	} bus_make;
+	int ret;
+
+	memset(&bus_make, 0, sizeof(bus_make));
+	bus_make.bp.size = sizeof(bus_make.bp);
+	bus_make.bp.type = KDBUS_ITEM_BLOOM_PARAMETER;
+	bus_make.bp.bloom.size = 64;
+	bus_make.bp.bloom.n_hash = 1;
+
+	snprintf(bus_make.name.str, sizeof(bus_make.name.str),
+		 "%u-%s", getuid(), name);
+
+	bus_make.attach.type = KDBUS_ITEM_ATTACH_FLAGS_SEND;
+	bus_make.attach.size = sizeof(bus_make.attach);
+	bus_make.attach.flags = owner_meta;
+
+	bus_make.name.type = KDBUS_ITEM_MAKE_NAME;
+	bus_make.name.size = KDBUS_ITEM_HEADER_SIZE +
+			     strlen(bus_make.name.str) + 1;
+
+	bus_make.cmd.flags = KDBUS_MAKE_ACCESS_WORLD;
+	bus_make.cmd.size = sizeof(bus_make.cmd) +
+			     bus_make.bp.size +
+			     bus_make.attach.size +
+			     bus_make.name.size;
+
+	kdbus_printf("Creating bus with name >%s< on control fd %d ...\n",
+		     name, control_fd);
+
+	ret = kdbus_cmd_bus_make(control_fd, &bus_make.cmd);
+	if (ret < 0) {
+		kdbus_printf("--- error when making bus: %d (%m)\n", ret);
+		return ret;
+	}
+
+	if (ret == 0 && path)
+		*path = strdup(bus_make.name.str);
+
+	return ret;
+}
+
+struct kdbus_conn *
+kdbus_hello(const char *path, uint64_t flags,
+	    const struct kdbus_item *item, size_t item_size)
+{
+	struct kdbus_cmd_free cmd_free = {};
+	int fd, ret;
+	struct {
+		struct kdbus_cmd_hello hello;
+
+		struct {
+			uint64_t size;
+			uint64_t type;
+			char str[16];
+		} conn_name;
+
+		uint8_t extra_items[item_size];
+	} h;
+	struct kdbus_conn *conn;
+
+	memset(&h, 0, sizeof(h));
+
+	if (item_size > 0)
+		memcpy(h.extra_items, item, item_size);
+
+	kdbus_printf("-- opening bus connection %s\n", path);
+	fd = open(path, O_RDWR|O_CLOEXEC);
+	if (fd < 0) {
+		kdbus_printf("--- error %d (%m)\n", fd);
+		return NULL;
+	}
+
+	h.hello.flags = flags | KDBUS_HELLO_ACCEPT_FD;
+	h.hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+	h.hello.attach_flags_recv = _KDBUS_ATTACH_ALL;
+	h.conn_name.type = KDBUS_ITEM_CONN_DESCRIPTION;
+	strcpy(h.conn_name.str, "this-is-my-name");
+	h.conn_name.size = KDBUS_ITEM_HEADER_SIZE + strlen(h.conn_name.str) + 1;
+
+	h.hello.size = sizeof(h);
+	h.hello.pool_size = POOL_SIZE;
+
+	ret = kdbus_cmd_hello(fd, (struct kdbus_cmd_hello *) &h.hello);
+	if (ret < 0) {
+		kdbus_printf("--- error when saying hello: %d (%m)\n", ret);
+		return NULL;
+	}
+	kdbus_printf("-- Our peer ID for %s: %llu -- bus uuid: '%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x'\n",
+		     path, (unsigned long long)h.hello.id,
+		     h.hello.id128[0],  h.hello.id128[1],  h.hello.id128[2],
+		     h.hello.id128[3],  h.hello.id128[4],  h.hello.id128[5],
+		     h.hello.id128[6],  h.hello.id128[7],  h.hello.id128[8],
+		     h.hello.id128[9],  h.hello.id128[10], h.hello.id128[11],
+		     h.hello.id128[12], h.hello.id128[13], h.hello.id128[14],
+		     h.hello.id128[15]);
+
+	cmd_free.size = sizeof(cmd_free);
+	cmd_free.offset = h.hello.offset;
+	kdbus_cmd_free(fd, &cmd_free);
+
+	conn = malloc(sizeof(*conn));
+	if (!conn) {
+		kdbus_printf("unable to malloc()!?\n");
+		return NULL;
+	}
+
+	conn->buf = mmap(NULL, POOL_SIZE, PROT_READ, MAP_SHARED, fd, 0);
+	if (conn->buf == MAP_FAILED) {
+		free(conn);
+		close(fd);
+		kdbus_printf("--- error mmap (%m)\n");
+		return NULL;
+	}
+
+	conn->fd = fd;
+	conn->id = h.hello.id;
+	return conn;
+}
+
+struct kdbus_conn *
+kdbus_hello_registrar(const char *path, const char *name,
+		      const struct kdbus_policy_access *access,
+		      size_t num_access, uint64_t flags)
+{
+	struct kdbus_item *item, *items;
+	size_t i, size;
+
+	size = KDBUS_ITEM_SIZE(strlen(name) + 1) +
+		num_access * KDBUS_ITEM_SIZE(sizeof(*access));
+
+	items = alloca(size);
+
+	item = items;
+	item->size = KDBUS_ITEM_HEADER_SIZE + strlen(name) + 1;
+	item->type = KDBUS_ITEM_NAME;
+	strcpy(item->str, name);
+	item = KDBUS_ITEM_NEXT(item);
+
+	for (i = 0; i < num_access; i++) {
+		item->size = KDBUS_ITEM_HEADER_SIZE +
+			     sizeof(struct kdbus_policy_access);
+		item->type = KDBUS_ITEM_POLICY_ACCESS;
+
+		item->policy_access.type = access[i].type;
+		item->policy_access.access = access[i].access;
+		item->policy_access.id = access[i].id;
+
+		item = KDBUS_ITEM_NEXT(item);
+	}
+
+	return kdbus_hello(path, flags, items, size);
+}
+
+struct kdbus_conn *kdbus_hello_activator(const char *path, const char *name,
+				   const struct kdbus_policy_access *access,
+				   size_t num_access)
+{
+	return kdbus_hello_registrar(path, name, access, num_access,
+				     KDBUS_HELLO_ACTIVATOR);
+}
+
+bool kdbus_item_in_message(struct kdbus_msg *msg, uint64_t type)
+{
+	const struct kdbus_item *item;
+
+	KDBUS_ITEM_FOREACH(item, msg, items)
+		if (item->type == type)
+			return true;
+
+	return false;
+}
+
+int kdbus_bus_creator_info(struct kdbus_conn *conn,
+			   uint64_t flags,
+			   uint64_t *offset)
+{
+	struct kdbus_cmd_info *cmd;
+	size_t size = sizeof(*cmd);
+	int ret;
+
+	cmd = alloca(size);
+	memset(cmd, 0, size);
+	cmd->size = size;
+	cmd->attach_flags = flags;
+
+	ret = kdbus_cmd_bus_creator_info(conn->fd, cmd);
+	if (ret < 0) {
+		kdbus_printf("--- error when requesting info: %d (%m)\n", ret);
+		return ret;
+	}
+
+	if (offset)
+		*offset = cmd->offset;
+	else
+		kdbus_free(conn, cmd->offset);
+
+	return 0;
+}
+
+int kdbus_conn_info(struct kdbus_conn *conn, uint64_t id,
+		    const char *name, uint64_t flags,
+		    uint64_t *offset)
+{
+	struct kdbus_cmd_info *cmd;
+	size_t size = sizeof(*cmd);
+	struct kdbus_info *info;
+	int ret;
+
+	if (name)
+		size += KDBUS_ITEM_HEADER_SIZE + strlen(name) + 1;
+
+	cmd = alloca(size);
+	memset(cmd, 0, size);
+	cmd->size = size;
+	cmd->attach_flags = flags;
+
+	if (name) {
+		cmd->items[0].size = KDBUS_ITEM_HEADER_SIZE + strlen(name) + 1;
+		cmd->items[0].type = KDBUS_ITEM_NAME;
+		strcpy(cmd->items[0].str, name);
+	} else {
+		cmd->id = id;
+	}
+
+	ret = kdbus_cmd_conn_info(conn->fd, cmd);
+	if (ret < 0) {
+		kdbus_printf("--- error when requesting info: %d (%m)\n", ret);
+		return ret;
+	}
+
+	info = (struct kdbus_info *) (conn->buf + cmd->offset);
+	if (info->size != cmd->info_size) {
+		kdbus_printf("%s(): size mismatch: %d != %d\n", __func__,
+				(int) info->size, (int) cmd->info_size);
+		return -EIO;
+	}
+
+	if (offset)
+		*offset = cmd->offset;
+	else
+		kdbus_free(conn, cmd->offset);
+
+	return 0;
+}
+
+void kdbus_conn_free(struct kdbus_conn *conn)
+{
+	if (!conn)
+		return;
+
+	if (conn->buf)
+		munmap(conn->buf, POOL_SIZE);
+
+	if (conn->fd >= 0)
+		close(conn->fd);
+
+	free(conn);
+}
+
+int sys_memfd_create(const char *name, __u64 size)
+{
+	int ret, fd;
+
+	fd = syscall(__NR_memfd_create, name, MFD_ALLOW_SEALING);
+	if (fd < 0)
+		return fd;
+
+	ret = ftruncate(fd, size);
+	if (ret < 0) {
+		close(fd);
+		return ret;
+	}
+
+	return fd;
+}
+
+int sys_memfd_seal_set(int fd)
+{
+	return fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK |
+			 F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL);
+}
+
+off_t sys_memfd_get_size(int fd, off_t *size)
+{
+	struct stat stat;
+	int ret;
+
+	ret = fstat(fd, &stat);
+	if (ret < 0) {
+		kdbus_printf("stat() failed: %m\n");
+		return ret;
+	}
+
+	*size = stat.st_size;
+	return 0;
+}
+
+static int __kdbus_msg_send(const struct kdbus_conn *conn,
+			    const char *name,
+			    uint64_t cookie,
+			    uint64_t flags,
+			    uint64_t timeout,
+			    int64_t priority,
+			    uint64_t dst_id,
+			    uint64_t cmd_flags,
+			    int cancel_fd)
+{
+	struct kdbus_cmd_send *cmd = NULL;
+	struct kdbus_msg *msg = NULL;
+	const char ref1[1024 * 128 + 3] = "0123456789_0";
+	const char ref2[] = "0123456789_1";
+	struct kdbus_item *item;
+	struct timespec now;
+	uint64_t size;
+	int memfd = -1;
+	int ret;
+
+	size = sizeof(*msg) + 3 * KDBUS_ITEM_SIZE(sizeof(struct kdbus_vec));
+
+	if (dst_id == KDBUS_DST_ID_BROADCAST)
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_bloom_filter)) + 64;
+	else {
+		memfd = sys_memfd_create("my-name-is-nice", 1024 * 1024);
+		if (memfd < 0) {
+			kdbus_printf("failed to create memfd: %m\n");
+			return memfd;
+		}
+
+		if (write(memfd, "kdbus memfd 1234567", 19) != 19) {
+			ret = -errno;
+			kdbus_printf("writing to memfd failed: %m\n");
+			goto out;
+		}
+
+		ret = sys_memfd_seal_set(memfd);
+		if (ret < 0) {
+			ret = -errno;
+			kdbus_printf("memfd sealing failed: %m\n");
+			goto out;
+		}
+
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_memfd));
+	}
+
+	if (name)
+		size += KDBUS_ITEM_SIZE(strlen(name) + 1);
+
+	msg = malloc(size);
+	if (!msg) {
+		ret = -errno;
+		kdbus_printf("unable to malloc()!?\n");
+		goto out;
+	}
+
+	if (dst_id == KDBUS_DST_ID_BROADCAST)
+		flags |= KDBUS_MSG_SIGNAL;
+
+	memset(msg, 0, size);
+	msg->flags = flags;
+	msg->priority = priority;
+	msg->size = size;
+	msg->src_id = conn->id;
+	msg->dst_id = name ? 0 : dst_id;
+	msg->cookie = cookie;
+	msg->payload_type = KDBUS_PAYLOAD_DBUS;
+
+	if (timeout) {
+		ret = clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
+		if (ret < 0)
+			goto out;
+
+		msg->timeout_ns = now.tv_sec * 1000000000ULL +
+				  now.tv_nsec + timeout;
+	}
+
+	item = msg->items;
+
+	if (name) {
+		item->type = KDBUS_ITEM_DST_NAME;
+		item->size = KDBUS_ITEM_HEADER_SIZE + strlen(name) + 1;
+		strcpy(item->str, name);
+		item = KDBUS_ITEM_NEXT(item);
+	}
+
+	item->type = KDBUS_ITEM_PAYLOAD_VEC;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+	item->vec.address = (uintptr_t)&ref1;
+	item->vec.size = sizeof(ref1);
+	item = KDBUS_ITEM_NEXT(item);
+
+	/* data padding for ref1 */
+	item->type = KDBUS_ITEM_PAYLOAD_VEC;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+	item->vec.address = (uintptr_t)NULL;
+	item->vec.size =  KDBUS_ALIGN8(sizeof(ref1)) - sizeof(ref1);
+	item = KDBUS_ITEM_NEXT(item);
+
+	item->type = KDBUS_ITEM_PAYLOAD_VEC;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+	item->vec.address = (uintptr_t)&ref2;
+	item->vec.size = sizeof(ref2);
+	item = KDBUS_ITEM_NEXT(item);
+
+	if (dst_id == KDBUS_DST_ID_BROADCAST) {
+		item->type = KDBUS_ITEM_BLOOM_FILTER;
+		item->size = KDBUS_ITEM_SIZE(sizeof(struct kdbus_bloom_filter)) + 64;
+		item->bloom_filter.generation = 0;
+	} else {
+		item->type = KDBUS_ITEM_PAYLOAD_MEMFD;
+		item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_memfd);
+		item->memfd.size = 16;
+		item->memfd.fd = memfd;
+	}
+	item = KDBUS_ITEM_NEXT(item);
+
+	size = sizeof(*cmd);
+	if (cancel_fd != -1)
+		size += KDBUS_ITEM_SIZE(sizeof(cancel_fd));
+
+	cmd = malloc(size);
+	if (!cmd) {
+		ret = -errno;
+		kdbus_printf("unable to malloc()!?\n");
+		goto out;
+	}
+
+	cmd->size = size;
+	cmd->flags = cmd_flags;
+	cmd->msg_address = (uintptr_t)msg;
+
+	item = cmd->items;
+
+	if (cancel_fd != -1) {
+		item->type = KDBUS_ITEM_CANCEL_FD;
+		item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(cancel_fd);
+		item->fds[0] = cancel_fd;
+		item = KDBUS_ITEM_NEXT(item);
+	}
+
+	ret = kdbus_cmd_send(conn->fd, cmd);
+	if (ret < 0) {
+		kdbus_printf("error sending message: %d (%m)\n", ret);
+		goto out;
+	}
+
+	if (cmd_flags & KDBUS_SEND_SYNC_REPLY) {
+		struct kdbus_msg *reply;
+
+		kdbus_printf("SYNC REPLY @offset %llu:\n", cmd->reply.offset);
+		reply = (struct kdbus_msg *)(conn->buf + cmd->reply.offset);
+		kdbus_msg_dump(conn, reply);
+
+		kdbus_msg_free(reply);
+
+		ret = kdbus_free(conn, cmd->reply.offset);
+		if (ret < 0)
+			goto out;
+	}
+
+out:
+	free(msg);
+	free(cmd);
+
+	if (memfd >= 0)
+		close(memfd);
+
+	return ret < 0 ? ret : 0;
+}
+
+int kdbus_msg_send(const struct kdbus_conn *conn, const char *name,
+		   uint64_t cookie, uint64_t flags, uint64_t timeout,
+		   int64_t priority, uint64_t dst_id)
+{
+	return __kdbus_msg_send(conn, name, cookie, flags, timeout, priority,
+				dst_id, 0, -1);
+}
+
+int kdbus_msg_send_sync(const struct kdbus_conn *conn, const char *name,
+			uint64_t cookie, uint64_t flags, uint64_t timeout,
+			int64_t priority, uint64_t dst_id, int cancel_fd)
+{
+	return __kdbus_msg_send(conn, name, cookie, flags, timeout, priority,
+				dst_id, KDBUS_SEND_SYNC_REPLY, cancel_fd);
+}
+
+int kdbus_msg_send_reply(const struct kdbus_conn *conn,
+			 uint64_t reply_cookie,
+			 uint64_t dst_id)
+{
+	struct kdbus_cmd_send cmd = {};
+	struct kdbus_msg *msg;
+	const char ref1[1024 * 128 + 3] = "0123456789_0";
+	struct kdbus_item *item;
+	uint64_t size;
+	int ret;
+
+	size = sizeof(struct kdbus_msg);
+	size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_vec));
+
+	msg = malloc(size);
+	if (!msg) {
+		kdbus_printf("unable to malloc()!?\n");
+		return -ENOMEM;
+	}
+
+	memset(msg, 0, size);
+	msg->size = size;
+	msg->src_id = conn->id;
+	msg->dst_id = dst_id;
+	msg->cookie_reply = reply_cookie;
+	msg->payload_type = KDBUS_PAYLOAD_DBUS;
+
+	item = msg->items;
+
+	item->type = KDBUS_ITEM_PAYLOAD_VEC;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+	item->vec.address = (uintptr_t)&ref1;
+	item->vec.size = sizeof(ref1);
+	item = KDBUS_ITEM_NEXT(item);
+
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)msg;
+
+	ret = kdbus_cmd_send(conn->fd, &cmd);
+	if (ret < 0)
+		kdbus_printf("error sending message: %d (%m)\n", ret);
+
+	free(msg);
+
+	return ret;
+}
+
+static char *msg_id(uint64_t id, char *buf)
+{
+	if (id == 0)
+		return "KERNEL";
+	if (id == ~0ULL)
+		return "BROADCAST";
+	sprintf(buf, "%llu", (unsigned long long)id);
+	return buf;
+}
+
+int kdbus_msg_dump(const struct kdbus_conn *conn, const struct kdbus_msg *msg)
+{
+	const struct kdbus_item *item = msg->items;
+	char buf_src[32];
+	char buf_dst[32];
+	uint64_t timeout = 0;
+	uint64_t cookie_reply = 0;
+	int ret = 0;
+
+	if (msg->flags & KDBUS_MSG_EXPECT_REPLY)
+		timeout = msg->timeout_ns;
+	else
+		cookie_reply = msg->cookie_reply;
+
+	kdbus_printf("MESSAGE: %s (%llu bytes) flags=0x%08llx, %s → %s, "
+		     "cookie=%llu, timeout=%llu cookie_reply=%llu priority=%lli\n",
+		enum_PAYLOAD(msg->payload_type), (unsigned long long)msg->size,
+		(unsigned long long)msg->flags,
+		msg_id(msg->src_id, buf_src), msg_id(msg->dst_id, buf_dst),
+		(unsigned long long)msg->cookie, (unsigned long long)timeout,
+		(unsigned long long)cookie_reply, (long long)msg->priority);
+
+	KDBUS_ITEM_FOREACH(item, msg, items) {
+		if (item->size < KDBUS_ITEM_HEADER_SIZE) {
+			kdbus_printf("  +%s (%llu bytes) invalid data record\n",
+				     enum_MSG(item->type), item->size);
+			ret = -EINVAL;
+			break;
+		}
+
+		switch (item->type) {
+		case KDBUS_ITEM_PAYLOAD_OFF: {
+			char *s;
+
+			if (item->vec.offset == ~0ULL)
+				s = "[\\0-bytes]";
+			else
+				s = (char *)msg + item->vec.offset;
+
+			kdbus_printf("  +%s (%llu bytes) off=%llu size=%llu '%s'\n",
+			       enum_MSG(item->type), item->size,
+			       (unsigned long long)item->vec.offset,
+			       (unsigned long long)item->vec.size, s);
+			break;
+		}
+
+		case KDBUS_ITEM_FDS: {
+			int i, n = (item->size - KDBUS_ITEM_HEADER_SIZE) /
+					sizeof(int);
+
+			kdbus_printf("  +%s (%llu bytes, %d fds)\n",
+			       enum_MSG(item->type), item->size, n);
+
+			for (i = 0; i < n; i++)
+				kdbus_printf("    fd[%d] = %d\n",
+					     i, item->fds[i]);
+
+			break;
+		}
+
+		case KDBUS_ITEM_PAYLOAD_MEMFD: {
+			char *buf;
+			off_t size;
+
+			buf = mmap(NULL, item->memfd.size, PROT_READ,
+				   MAP_PRIVATE, item->memfd.fd, 0);
+			if (buf == MAP_FAILED) {
+				kdbus_printf("mmap() fd=%i size=%llu failed: %m\n",
+					     item->memfd.fd, item->memfd.size);
+				break;
+			}
+
+			if (sys_memfd_get_size(item->memfd.fd, &size) < 0) {
+				kdbus_printf("KDBUS_CMD_MEMFD_SIZE_GET failed: %m\n");
+				break;
+			}
+
+			kdbus_printf("  +%s (%llu bytes) fd=%i size=%llu filesize=%llu '%s'\n",
+			       enum_MSG(item->type), item->size, item->memfd.fd,
+			       (unsigned long long)item->memfd.size,
+			       (unsigned long long)size, buf);
+			munmap(buf, item->memfd.size);
+			break;
+		}
+
+		case KDBUS_ITEM_CREDS:
+			kdbus_printf("  +%s (%llu bytes) uid=%lld, euid=%lld, suid=%lld, fsuid=%lld, "
+							"gid=%lld, egid=%lld, sgid=%lld, fsgid=%lld\n",
+				enum_MSG(item->type), item->size,
+				item->creds.uid, item->creds.euid,
+				item->creds.suid, item->creds.fsuid,
+				item->creds.gid, item->creds.egid,
+				item->creds.sgid, item->creds.fsgid);
+			break;
+
+		case KDBUS_ITEM_PIDS:
+			kdbus_printf("  +%s (%llu bytes) pid=%lld, tid=%lld, ppid=%lld\n",
+				enum_MSG(item->type), item->size,
+				item->pids.pid, item->pids.tid,
+				item->pids.ppid);
+			break;
+
+		case KDBUS_ITEM_AUXGROUPS: {
+			int i, n;
+
+			kdbus_printf("  +%s (%llu bytes)\n",
+				     enum_MSG(item->type), item->size);
+			n = (item->size - KDBUS_ITEM_HEADER_SIZE) /
+				sizeof(uint64_t);
+
+			for (i = 0; i < n; i++)
+				kdbus_printf("    gid[%d] = %lld\n",
+					     i, item->data64[i]);
+			break;
+		}
+
+		case KDBUS_ITEM_NAME:
+		case KDBUS_ITEM_PID_COMM:
+		case KDBUS_ITEM_TID_COMM:
+		case KDBUS_ITEM_EXE:
+		case KDBUS_ITEM_CGROUP:
+		case KDBUS_ITEM_SECLABEL:
+		case KDBUS_ITEM_DST_NAME:
+		case KDBUS_ITEM_CONN_DESCRIPTION:
+			kdbus_printf("  +%s (%llu bytes) '%s' (%zu)\n",
+				     enum_MSG(item->type), item->size,
+				     item->str, strlen(item->str));
+			break;
+
+		case KDBUS_ITEM_OWNED_NAME: {
+			kdbus_printf("  +%s (%llu bytes) '%s' (%zu) flags=0x%08llx\n",
+				     enum_MSG(item->type), item->size,
+				     item->name.name, strlen(item->name.name),
+				     item->name.flags);
+			break;
+		}
+
+		case KDBUS_ITEM_CMDLINE: {
+			size_t size = item->size - KDBUS_ITEM_HEADER_SIZE;
+			const char *str = item->str;
+			int count = 0;
+
+			kdbus_printf("  +%s (%llu bytes) ",
+				     enum_MSG(item->type), item->size);
+			while (size) {
+				kdbus_printf("'%s' ", str);
+				size -= strlen(str) + 1;
+				str += strlen(str) + 1;
+				count++;
+			}
+
+			kdbus_printf("(%d string%s)\n",
+				     count, (count == 1) ? "" : "s");
+			break;
+		}
+
+		case KDBUS_ITEM_AUDIT:
+			kdbus_printf("  +%s (%llu bytes) loginuid=%u sessionid=%u\n",
+			       enum_MSG(item->type), item->size,
+			       item->audit.loginuid, item->audit.sessionid);
+			break;
+
+		case KDBUS_ITEM_CAPS: {
+			const uint32_t *cap;
+			int n, i;
+
+			kdbus_printf("  +%s (%llu bytes) len=%llu bytes, last_cap %d\n",
+				     enum_MSG(item->type), item->size,
+				     (unsigned long long)item->size -
+					KDBUS_ITEM_HEADER_SIZE,
+				     (int) item->caps.last_cap);
+
+			cap = item->caps.caps;
+			n = (item->size - offsetof(struct kdbus_item, caps.caps))
+				/ 4 / sizeof(uint32_t);
+
+			kdbus_printf("    CapInh=");
+			for (i = 0; i < n; i++)
+				kdbus_printf("%08x", cap[(0 * n) + (n - i - 1)]);
+
+			kdbus_printf(" CapPrm=");
+			for (i = 0; i < n; i++)
+				kdbus_printf("%08x", cap[(1 * n) + (n - i - 1)]);
+
+			kdbus_printf(" CapEff=");
+			for (i = 0; i < n; i++)
+				kdbus_printf("%08x", cap[(2 * n) + (n - i - 1)]);
+
+			kdbus_printf(" CapBnd=");
+			for (i = 0; i < n; i++)
+				kdbus_printf("%08x", cap[(3 * n) + (n - i - 1)]);
+			kdbus_printf("\n");
+			break;
+		}
+
+		case KDBUS_ITEM_TIMESTAMP:
+			kdbus_printf("  +%s (%llu bytes) seq=%llu realtime=%lluns monotonic=%lluns\n",
+			       enum_MSG(item->type), item->size,
+			       (unsigned long long)item->timestamp.seqnum,
+			       (unsigned long long)item->timestamp.realtime_ns,
+			       (unsigned long long)item->timestamp.monotonic_ns);
+			break;
+
+		case KDBUS_ITEM_REPLY_TIMEOUT:
+			kdbus_printf("  +%s (%llu bytes) cookie=%llu\n",
+			       enum_MSG(item->type), item->size,
+			       msg->cookie_reply);
+			break;
+
+		case KDBUS_ITEM_NAME_ADD:
+		case KDBUS_ITEM_NAME_REMOVE:
+		case KDBUS_ITEM_NAME_CHANGE:
+			kdbus_printf("  +%s (%llu bytes) '%s', old id=%lld, now id=%lld, old_flags=0x%llx new_flags=0x%llx\n",
+				enum_MSG(item->type),
+				(unsigned long long) item->size,
+				item->name_change.name,
+				item->name_change.old_id.id,
+				item->name_change.new_id.id,
+				item->name_change.old_id.flags,
+				item->name_change.new_id.flags);
+			break;
+
+		case KDBUS_ITEM_ID_ADD:
+		case KDBUS_ITEM_ID_REMOVE:
+			kdbus_printf("  +%s (%llu bytes) id=%llu flags=%llu\n",
+			       enum_MSG(item->type),
+			       (unsigned long long) item->size,
+			       (unsigned long long) item->id_change.id,
+			       (unsigned long long) item->id_change.flags);
+			break;
+
+		default:
+			kdbus_printf("  +%s (%llu bytes)\n",
+				     enum_MSG(item->type), item->size);
+			break;
+		}
+	}
+
+	if ((char *)item - ((char *)msg + msg->size) >= 8) {
+		kdbus_printf("invalid padding at end of message\n");
+		ret = -EINVAL;
+	}
+
+	kdbus_printf("\n");
+
+	return ret;
+}
+
+void kdbus_msg_free(struct kdbus_msg *msg)
+{
+	const struct kdbus_item *item;
+	int nfds, i;
+
+	if (!msg)
+		return;
+
+	KDBUS_ITEM_FOREACH(item, msg, items) {
+		switch (item->type) {
+		/* close all memfds */
+		case KDBUS_ITEM_PAYLOAD_MEMFD:
+			close(item->memfd.fd);
+			break;
+		case KDBUS_ITEM_FDS:
+			nfds = (item->size - KDBUS_ITEM_HEADER_SIZE) /
+				sizeof(int);
+
+			for (i = 0; i < nfds; i++)
+				close(item->fds[i]);
+
+			break;
+		}
+	}
+}
+
+int kdbus_msg_recv(struct kdbus_conn *conn,
+		   struct kdbus_msg **msg_out,
+		   uint64_t *offset)
+{
+	struct kdbus_cmd_recv recv = { .size = sizeof(recv) };
+	struct kdbus_msg *msg;
+	int ret;
+
+	ret = kdbus_cmd_recv(conn->fd, &recv);
+	if (ret < 0)
+		return ret;
+
+	msg = (struct kdbus_msg *)(conn->buf + recv.msg.offset);
+	ret = kdbus_msg_dump(conn, msg);
+	if (ret < 0) {
+		kdbus_msg_free(msg);
+		return ret;
+	}
+
+	if (msg_out) {
+		*msg_out = msg;
+
+		if (offset)
+			*offset = recv.msg.offset;
+	} else {
+		kdbus_msg_free(msg);
+
+		ret = kdbus_free(conn, recv.msg.offset);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Returns: 0 on success, negative errno on failure.
+ *
+ * We must return -ETIMEDOUT, -ECONNREST, -EAGAIN and other errors.
+ * We must return the result of kdbus_msg_recv()
+ */
+int kdbus_msg_recv_poll(struct kdbus_conn *conn,
+			int timeout_ms,
+			struct kdbus_msg **msg_out,
+			uint64_t *offset)
+{
+	int ret;
+
+	do {
+		struct timeval before, after, diff;
+		struct pollfd fd;
+
+		fd.fd = conn->fd;
+		fd.events = POLLIN | POLLPRI | POLLHUP;
+		fd.revents = 0;
+
+		gettimeofday(&before, NULL);
+		ret = poll(&fd, 1, timeout_ms);
+		gettimeofday(&after, NULL);
+
+		if (ret == 0) {
+			ret = -ETIMEDOUT;
+			break;
+		}
+
+		if (ret > 0) {
+			if (fd.revents & POLLIN)
+				ret = kdbus_msg_recv(conn, msg_out, offset);
+
+			if (fd.revents & (POLLHUP | POLLERR))
+				ret = -ECONNRESET;
+		}
+
+		if (ret == 0 || ret != -EAGAIN)
+			break;
+
+		timersub(&after, &before, &diff);
+		timeout_ms -= diff.tv_sec * 1000UL +
+			      diff.tv_usec / 1000UL;
+	} while (timeout_ms > 0);
+
+	return ret;
+}
+
+int kdbus_free(const struct kdbus_conn *conn, uint64_t offset)
+{
+	struct kdbus_cmd_free cmd_free = {};
+	int ret;
+
+	cmd_free.size = sizeof(cmd_free);
+	cmd_free.offset = offset;
+	cmd_free.flags = 0;
+
+	ret = kdbus_cmd_free(conn->fd, &cmd_free);
+	if (ret < 0) {
+		kdbus_printf("KDBUS_CMD_FREE failed: %d (%m)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int kdbus_name_acquire(struct kdbus_conn *conn,
+		       const char *name, uint64_t *flags)
+{
+	struct kdbus_cmd *cmd_name;
+	size_t name_len = strlen(name) + 1;
+	uint64_t size = sizeof(*cmd_name) + KDBUS_ITEM_SIZE(name_len);
+	struct kdbus_item *item;
+	int ret;
+
+	cmd_name = alloca(size);
+
+	memset(cmd_name, 0, size);
+
+	item = cmd_name->items;
+	item->size = KDBUS_ITEM_HEADER_SIZE + name_len;
+	item->type = KDBUS_ITEM_NAME;
+	strcpy(item->str, name);
+
+	cmd_name->size = size;
+	if (flags)
+		cmd_name->flags = *flags;
+
+	ret = kdbus_cmd_name_acquire(conn->fd, cmd_name);
+	if (ret < 0) {
+		kdbus_printf("error aquiring name: %s\n", strerror(-ret));
+		return ret;
+	}
+
+	kdbus_printf("%s(): flags after call: 0x%llx\n", __func__,
+		     cmd_name->return_flags);
+
+	if (flags)
+		*flags = cmd_name->return_flags;
+
+	return 0;
+}
+
+int kdbus_name_release(struct kdbus_conn *conn, const char *name)
+{
+	struct kdbus_cmd *cmd_name;
+	size_t name_len = strlen(name) + 1;
+	uint64_t size = sizeof(*cmd_name) + KDBUS_ITEM_SIZE(name_len);
+	struct kdbus_item *item;
+	int ret;
+
+	cmd_name = alloca(size);
+
+	memset(cmd_name, 0, size);
+
+	item = cmd_name->items;
+	item->size = KDBUS_ITEM_HEADER_SIZE + name_len;
+	item->type = KDBUS_ITEM_NAME;
+	strcpy(item->str, name);
+
+	cmd_name->size = size;
+
+	kdbus_printf("conn %lld giving up name '%s'\n",
+		     (unsigned long long) conn->id, name);
+
+	ret = kdbus_cmd_name_release(conn->fd, cmd_name);
+	if (ret < 0) {
+		kdbus_printf("error releasing name: %s\n", strerror(-ret));
+		return ret;
+	}
+
+	return 0;
+}
+
+int kdbus_list(struct kdbus_conn *conn, uint64_t flags)
+{
+	struct kdbus_cmd_list cmd_list = {};
+	struct kdbus_info *list, *name;
+	int ret;
+
+	cmd_list.size = sizeof(cmd_list);
+	cmd_list.flags = flags;
+
+	ret = kdbus_cmd_list(conn->fd, &cmd_list);
+	if (ret < 0) {
+		kdbus_printf("error listing names: %d (%m)\n", ret);
+		return ret;
+	}
+
+	kdbus_printf("REGISTRY:\n");
+	list = (struct kdbus_info *)(conn->buf + cmd_list.offset);
+
+	KDBUS_FOREACH(name, list, cmd_list.list_size) {
+		uint64_t flags = 0;
+		struct kdbus_item *item;
+		const char *n = "MISSING-NAME";
+
+		if (name->size == sizeof(struct kdbus_cmd))
+			continue;
+
+		KDBUS_ITEM_FOREACH(item, name, items)
+			if (item->type == KDBUS_ITEM_OWNED_NAME) {
+				n = item->name.name;
+				flags = item->name.flags;
+
+				kdbus_printf("%8llu flags=0x%08llx conn=0x%08llx '%s'\n",
+					     name->id,
+					     (unsigned long long) flags,
+					     name->flags, n);
+			}
+	}
+	kdbus_printf("\n");
+
+	ret = kdbus_free(conn, cmd_list.offset);
+
+	return ret;
+}
+
+int kdbus_conn_update_attach_flags(struct kdbus_conn *conn,
+				   uint64_t attach_flags_send,
+				   uint64_t attach_flags_recv)
+{
+	int ret;
+	size_t size;
+	struct kdbus_cmd *update;
+	struct kdbus_item *item;
+
+	size = sizeof(struct kdbus_cmd);
+	size += KDBUS_ITEM_SIZE(sizeof(uint64_t)) * 2;
+
+	update = malloc(size);
+	if (!update) {
+		kdbus_printf("error malloc: %m\n");
+		return -ENOMEM;
+	}
+
+	memset(update, 0, size);
+	update->size = size;
+
+	item = update->items;
+
+	item->type = KDBUS_ITEM_ATTACH_FLAGS_SEND;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(uint64_t);
+	item->data64[0] = attach_flags_send;
+	item = KDBUS_ITEM_NEXT(item);
+
+	item->type = KDBUS_ITEM_ATTACH_FLAGS_RECV;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(uint64_t);
+	item->data64[0] = attach_flags_recv;
+	item = KDBUS_ITEM_NEXT(item);
+
+	ret = kdbus_cmd_update(conn->fd, update);
+	if (ret < 0)
+		kdbus_printf("error conn update: %d (%m)\n", ret);
+
+	free(update);
+
+	return ret;
+}
+
+int kdbus_conn_update_policy(struct kdbus_conn *conn, const char *name,
+			     const struct kdbus_policy_access *access,
+			     size_t num_access)
+{
+	struct kdbus_cmd *update;
+	struct kdbus_item *item;
+	size_t i, size;
+	int ret;
+
+	size = sizeof(struct kdbus_cmd);
+	size += KDBUS_ITEM_SIZE(strlen(name) + 1);
+	size += num_access * KDBUS_ITEM_SIZE(sizeof(struct kdbus_policy_access));
+
+	update = malloc(size);
+	if (!update) {
+		kdbus_printf("error malloc: %m\n");
+		return -ENOMEM;
+	}
+
+	memset(update, 0, size);
+	update->size = size;
+
+	item = update->items;
+
+	item->type = KDBUS_ITEM_NAME;
+	item->size = KDBUS_ITEM_HEADER_SIZE + strlen(name) + 1;
+	strcpy(item->str, name);
+	item = KDBUS_ITEM_NEXT(item);
+
+	for (i = 0; i < num_access; i++) {
+		item->size = KDBUS_ITEM_HEADER_SIZE +
+			     sizeof(struct kdbus_policy_access);
+		item->type = KDBUS_ITEM_POLICY_ACCESS;
+
+		item->policy_access.type = access[i].type;
+		item->policy_access.access = access[i].access;
+		item->policy_access.id = access[i].id;
+
+		item = KDBUS_ITEM_NEXT(item);
+	}
+
+	ret = kdbus_cmd_update(conn->fd, update);
+	if (ret < 0)
+		kdbus_printf("error conn update: %d (%m)\n", ret);
+
+	free(update);
+
+	return ret;
+}
+
+int kdbus_add_match_id(struct kdbus_conn *conn, uint64_t cookie,
+		       uint64_t type, uint64_t id)
+{
+	struct {
+		struct kdbus_cmd_match cmd;
+		struct {
+			uint64_t size;
+			uint64_t type;
+			struct kdbus_notify_id_change chg;
+		} item;
+	} buf;
+	int ret;
+
+	memset(&buf, 0, sizeof(buf));
+
+	buf.cmd.size = sizeof(buf);
+	buf.cmd.cookie = cookie;
+	buf.item.size = sizeof(buf.item);
+	buf.item.type = type;
+	buf.item.chg.id = id;
+
+	ret = kdbus_cmd_match_add(conn->fd, &buf.cmd);
+	if (ret < 0)
+		kdbus_printf("--- error adding conn match: %d (%m)\n", ret);
+
+	return ret;
+}
+
+int kdbus_add_match_empty(struct kdbus_conn *conn)
+{
+	struct {
+		struct kdbus_cmd_match cmd;
+		struct kdbus_item item;
+	} buf;
+	int ret;
+
+	memset(&buf, 0, sizeof(buf));
+
+	buf.item.size = sizeof(uint64_t) * 3;
+	buf.item.type = KDBUS_ITEM_ID;
+	buf.item.id = KDBUS_MATCH_ID_ANY;
+
+	buf.cmd.size = sizeof(buf.cmd) + buf.item.size;
+
+	ret = kdbus_cmd_match_add(conn->fd, &buf.cmd);
+	if (ret < 0)
+		kdbus_printf("--- error adding conn match: %d (%m)\n", ret);
+
+	return ret;
+}
+
+static int all_ids_are_mapped(const char *path)
+{
+	int ret;
+	FILE *file;
+	uint32_t inside_id, length;
+
+	file = fopen(path, "r");
+	if (!file) {
+		ret = -errno;
+		kdbus_printf("error fopen() %s: %d (%m)\n",
+			     path, ret);
+		return ret;
+	}
+
+	ret = fscanf(file, "%u\t%*u\t%u", &inside_id, &length);
+	if (ret != 2) {
+		if (ferror(file))
+			ret = -errno;
+		else
+			ret = -EIO;
+
+		kdbus_printf("--- error fscanf(): %d\n", ret);
+		fclose(file);
+		return ret;
+	}
+
+	fclose(file);
+
+	/*
+	 * If length is 4294967295 which means the invalid uid
+	 * (uid_t) -1 then we are able to map all uid/gids
+	 */
+	if (inside_id == 0 && length == (uid_t) -1)
+		return 1;
+
+	return 0;
+}
+
+int all_uids_gids_are_mapped(void)
+{
+	int ret;
+
+	ret = all_ids_are_mapped("/proc/self/uid_map");
+	if (ret <= 0) {
+		kdbus_printf("--- error not all uids are mapped\n");
+		return 0;
+	}
+
+	ret = all_ids_are_mapped("/proc/self/gid_map");
+	if (ret <= 0) {
+		kdbus_printf("--- error not all gids are mapped\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+int drop_privileges(uid_t uid, gid_t gid)
+{
+	int ret;
+
+	ret = setgroups(0, NULL);
+	if (ret < 0) {
+		ret = -errno;
+		kdbus_printf("error setgroups: %d (%m)\n", ret);
+		return ret;
+	}
+
+	ret = setresgid(gid, gid, gid);
+	if (ret < 0) {
+		ret = -errno;
+		kdbus_printf("error setresgid: %d (%m)\n", ret);
+		return ret;
+	}
+
+	ret = setresuid(uid, uid, uid);
+	if (ret < 0) {
+		ret = -errno;
+		kdbus_printf("error setresuid: %d (%m)\n", ret);
+		return ret;
+	}
+
+	return ret;
+}
+
+uint64_t now(clockid_t clock)
+{
+	struct timespec spec;
+
+	clock_gettime(clock, &spec);
+	return spec.tv_sec * 1000ULL * 1000ULL * 1000ULL + spec.tv_nsec;
+}
+
+char *unique_name(const char *prefix)
+{
+	unsigned int i;
+	uint64_t u_now;
+	char n[17];
+	char *str;
+	int r;
+
+	/*
+	 * This returns a random string which is guaranteed to be
+	 * globally unique across all calls to unique_name(). We
+	 * compose the string as:
+	 *   <prefix>-<random>-<time>
+	 * With:
+	 *   <prefix>: string provided by the caller
+	 *   <random>: a random alpha string of 16 characters
+	 *   <time>: the current time in micro-seconds since last boot
+	 *
+	 * The <random> part makes the string always look vastly different,
+	 * the <time> part makes sure no two calls return the same string.
+	 */
+
+	u_now = now(CLOCK_MONOTONIC);
+
+	for (i = 0; i < sizeof(n) - 1; ++i)
+		n[i] = 'a' + (rand() % ('z' - 'a'));
+	n[sizeof(n) - 1] = 0;
+
+	r = asprintf(&str, "%s-%s-%" PRIu64, prefix, n, u_now);
+	if (r < 0)
+		return NULL;
+
+	return str;
+}
+
+static int do_userns_map_id(pid_t pid,
+			    const char *map_file,
+			    const char *map_id)
+{
+	int ret;
+	int fd;
+	char *map;
+	unsigned int i;
+
+	map = strndupa(map_id, strlen(map_id));
+	if (!map) {
+		ret = -errno;
+		kdbus_printf("error strndupa %s: %d (%m)\n",
+			map_file, ret);
+		return ret;
+	}
+
+	for (i = 0; i < strlen(map); i++)
+		if (map[i] == ',')
+			map[i] = '\n';
+
+	fd = open(map_file, O_RDWR);
+	if (fd < 0) {
+		ret = -errno;
+		kdbus_printf("error open %s: %d (%m)\n",
+			map_file, ret);
+		return ret;
+	}
+
+	ret = write(fd, map, strlen(map));
+	if (ret < 0) {
+		ret = -errno;
+		kdbus_printf("error write to %s: %d (%m)\n",
+			     map_file, ret);
+		goto out;
+	}
+
+	ret = 0;
+
+out:
+	close(fd);
+	return ret;
+}
+
+int userns_map_uid_gid(pid_t pid,
+		       const char *map_uid,
+		       const char *map_gid)
+{
+	int fd, ret;
+	char file_id[128] = {'\0'};
+
+	snprintf(file_id, sizeof(file_id), "/proc/%ld/uid_map",
+		 (long) pid);
+
+	ret = do_userns_map_id(pid, file_id, map_uid);
+	if (ret < 0)
+		return ret;
+
+	snprintf(file_id, sizeof(file_id), "/proc/%ld/setgroups",
+		 (long) pid);
+
+	fd = open(file_id, O_WRONLY);
+	if (fd >= 0) {
+		write(fd, "deny\n", 5);
+		close(fd);
+	}
+
+	snprintf(file_id, sizeof(file_id), "/proc/%ld/gid_map",
+		 (long) pid);
+
+	return do_userns_map_id(pid, file_id, map_gid);
+}
+
+static int do_cap_get_flag(cap_t caps, cap_value_t cap)
+{
+	int ret;
+	cap_flag_value_t flag_set;
+
+	ret = cap_get_flag(caps, cap, CAP_EFFECTIVE, &flag_set);
+	if (ret < 0) {
+		ret = -errno;
+		kdbus_printf("error cap_get_flag(): %d (%m)\n", ret);
+		return ret;
+	}
+
+	return (flag_set == CAP_SET);
+}
+
+/*
+ * Returns:
+ *  1 in case all the requested effective capabilities are set.
+ *  0 in case we do not have the requested capabilities. This value
+ *    will be used to abort tests with TEST_SKIP
+ *  Negative errno on failure.
+ *
+ *  Terminate args with a negative value.
+ */
+int test_is_capable(int cap, ...)
+{
+	int ret;
+	va_list ap;
+	cap_t caps;
+
+	caps = cap_get_proc();
+	if (!caps) {
+		ret = -errno;
+		kdbus_printf("error cap_get_proc(): %d (%m)\n", ret);
+		return ret;
+	}
+
+	ret = do_cap_get_flag(caps, (cap_value_t)cap);
+	if (ret <= 0)
+		goto out;
+
+	va_start(ap, cap);
+	while ((cap = va_arg(ap, int)) > 0) {
+		ret = do_cap_get_flag(caps, (cap_value_t)cap);
+		if (ret <= 0)
+			break;
+	}
+	va_end(ap);
+
+out:
+	cap_free(caps);
+	return ret;
+}
+
+int config_user_ns_is_enabled(void)
+{
+	return (access("/proc/self/uid_map", F_OK) == 0);
+}
+
+int config_auditsyscall_is_enabled(void)
+{
+	return (access("/proc/self/loginuid", F_OK) == 0);
+}
+
+int config_cgroups_is_enabled(void)
+{
+	return (access("/proc/self/cgroup", F_OK) == 0);
+}
+
+int config_security_is_enabled(void)
+{
+	int fd;
+	int ret;
+	char buf[128];
+
+	/* CONFIG_SECURITY is disabled */
+	if (access("/proc/self/attr/current", F_OK) != 0)
+		return 0;
+
+	/*
+	 * Now only if read() fails with -EINVAL then we assume
+	 * that SECLABEL and LSM are disabled
+	 */
+	fd = open("/proc/self/attr/current", O_RDONLY|O_CLOEXEC);
+	if (fd < 0)
+		return 1;
+
+	ret = read(fd, buf, sizeof(buf));
+	if (ret == -1 && errno == EINVAL)
+		ret = 0;
+	else
+		ret = 1;
+
+	close(fd);
+
+	return ret;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/kdbus-util.h linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-util.h
--- linux-4.2/tools/testing/selftests/kdbus/kdbus-util.h	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/kdbus-util.h	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2013-2015 Kay Sievers
+ * Copyright (C) 2013-2015 Daniel Mack
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#pragma once
+
+#define BIT(X) (1 << (X))
+
+#include <time.h>
+#include <stdbool.h>
+#include <linux/kdbus.h>
+
+#define _STRINGIFY(x) #x
+#define STRINGIFY(x) _STRINGIFY(x)
+#define ELEMENTSOF(x) (sizeof(x)/sizeof((x)[0]))
+
+#define KDBUS_PTR(addr) ((void *)(uintptr_t)(addr))
+
+#define KDBUS_ALIGN8(l) (((l) + 7) & ~7)
+#define KDBUS_ITEM_HEADER_SIZE offsetof(struct kdbus_item, data)
+#define KDBUS_ITEM_SIZE(s) KDBUS_ALIGN8((s) + KDBUS_ITEM_HEADER_SIZE)
+
+#define KDBUS_ITEM_NEXT(item) \
+	(typeof(item))((uint8_t *)(item) + KDBUS_ALIGN8((item)->size))
+#define KDBUS_ITEM_FOREACH(item, head, first)				\
+	for ((item) = (head)->first;					\
+	     ((uint8_t *)(item) < (uint8_t *)(head) + (head)->size) &&	\
+	       ((uint8_t *)(item) >= (uint8_t *)(head));		\
+	     (item) = KDBUS_ITEM_NEXT(item))
+#define KDBUS_FOREACH(iter, first, _size)				\
+	for ((iter) = (first);						\
+	     ((uint8_t *)(iter) < (uint8_t *)(first) + (_size)) &&	\
+	       ((uint8_t *)(iter) >= (uint8_t *)(first));		\
+	     (iter) = (void *)((uint8_t *)(iter) + KDBUS_ALIGN8((iter)->size)))
+
+#define _KDBUS_ATTACH_BITS_SET_NR (__builtin_popcountll(_KDBUS_ATTACH_ALL))
+
+/* Sum of KDBUS_ITEM_* that reflects _KDBUS_ATTACH_ALL */
+#define KDBUS_ATTACH_ITEMS_TYPE_SUM					\
+	((((_KDBUS_ATTACH_BITS_SET_NR - 1) *				\
+	((_KDBUS_ATTACH_BITS_SET_NR - 1) + 1)) / 2) +			\
+	(_KDBUS_ITEM_ATTACH_BASE * _KDBUS_ATTACH_BITS_SET_NR))
+
+#define POOL_SIZE (16 * 1024LU * 1024LU)
+
+#define UNPRIV_UID 65534
+#define UNPRIV_GID 65534
+
+/* Dump as user of process, useful for user namespace testing */
+#define SUID_DUMP_USER	1
+
+extern int kdbus_util_verbose;
+
+#define kdbus_printf(X...) \
+	if (kdbus_util_verbose) \
+		printf(X)
+
+#define RUN_UNPRIVILEGED(child_uid, child_gid, _child_, _parent_) ({	\
+		pid_t pid, rpid;					\
+		int ret;						\
+									\
+		pid = fork();						\
+		if (pid == 0) {						\
+			ret = drop_privileges(child_uid, child_gid);	\
+			ASSERT_EXIT_VAL(ret == 0, ret);			\
+									\
+			_child_;					\
+			_exit(0);					\
+		} else if (pid > 0) {					\
+			_parent_;					\
+			rpid = waitpid(pid, &ret, 0);			\
+			ASSERT_RETURN(rpid == pid);			\
+			ASSERT_RETURN(WIFEXITED(ret));			\
+			ASSERT_RETURN(WEXITSTATUS(ret) == 0);		\
+			ret = TEST_OK;					\
+		} else {						\
+			ret = pid;					\
+		}							\
+									\
+		ret;							\
+	})
+
+#define RUN_UNPRIVILEGED_CONN(_var_, _bus_, _code_)			\
+	RUN_UNPRIVILEGED(UNPRIV_UID, UNPRIV_GID, ({			\
+		struct kdbus_conn *_var_;				\
+		_var_ = kdbus_hello(_bus_, 0, NULL, 0);			\
+		ASSERT_EXIT(_var_);					\
+		_code_;							\
+		kdbus_conn_free(_var_);					\
+	}), ({ 0; }))
+
+#define RUN_CLONE_CHILD(clone_ret, flags, _setup_, _child_body_,	\
+			_parent_setup_, _parent_body_) ({		\
+	pid_t pid, rpid;						\
+	int ret;							\
+	int efd = -1;							\
+									\
+	_setup_;							\
+	efd = eventfd(0, EFD_CLOEXEC);					\
+	ASSERT_RETURN(efd >= 0);					\
+	*(clone_ret) = 0;						\
+	pid = syscall(__NR_clone, flags, NULL);				\
+	if (pid == 0) {							\
+		eventfd_t event_status = 0;				\
+		ret = prctl(PR_SET_PDEATHSIG, SIGKILL);			\
+		ASSERT_EXIT(ret == 0);					\
+		ret = eventfd_read(efd, &event_status);			\
+		if (ret < 0 || event_status != 1) {			\
+			kdbus_printf("error eventfd_read()\n");		\
+			_exit(EXIT_FAILURE);				\
+		}							\
+		_child_body_;						\
+		_exit(0);						\
+	} else if (pid > 0) {						\
+		_parent_setup_;						\
+		ret = eventfd_write(efd, 1);				\
+		ASSERT_RETURN(ret >= 0);				\
+		_parent_body_;						\
+		rpid = waitpid(pid, &ret, 0);				\
+		ASSERT_RETURN(rpid == pid);				\
+		ASSERT_RETURN(WIFEXITED(ret));				\
+		ASSERT_RETURN(WEXITSTATUS(ret) == 0);			\
+		ret = TEST_OK;						\
+	} else {							\
+		ret = -errno;						\
+		*(clone_ret) = -errno;					\
+	}								\
+	close(efd);							\
+	ret;								\
+})
+
+/* Enums for parent if it should drop privs or not */
+enum kdbus_drop_parent {
+	DO_NOT_DROP,
+	DROP_SAME_UNPRIV,
+	DROP_OTHER_UNPRIV,
+};
+
+struct kdbus_conn {
+	int fd;
+	uint64_t id;
+	unsigned char *buf;
+};
+
+int kdbus_sysfs_get_parameter_mask(const char *path, uint64_t *mask);
+int kdbus_sysfs_set_parameter_mask(const char *path, uint64_t mask);
+
+int sys_memfd_create(const char *name, __u64 size);
+int sys_memfd_seal_set(int fd);
+off_t sys_memfd_get_size(int fd, off_t *size);
+
+int kdbus_list(struct kdbus_conn *conn, uint64_t flags);
+int kdbus_name_release(struct kdbus_conn *conn, const char *name);
+int kdbus_name_acquire(struct kdbus_conn *conn, const char *name,
+		       uint64_t *flags);
+void kdbus_msg_free(struct kdbus_msg *msg);
+int kdbus_msg_recv(struct kdbus_conn *conn,
+		   struct kdbus_msg **msg, uint64_t *offset);
+int kdbus_msg_recv_poll(struct kdbus_conn *conn, int timeout_ms,
+			struct kdbus_msg **msg_out, uint64_t *offset);
+int kdbus_free(const struct kdbus_conn *conn, uint64_t offset);
+int kdbus_msg_dump(const struct kdbus_conn *conn,
+		   const struct kdbus_msg *msg);
+int kdbus_create_bus(int control_fd, const char *name,
+		     uint64_t owner_meta, char **path);
+int kdbus_msg_send(const struct kdbus_conn *conn, const char *name,
+		   uint64_t cookie, uint64_t flags, uint64_t timeout,
+		   int64_t priority, uint64_t dst_id);
+int kdbus_msg_send_sync(const struct kdbus_conn *conn, const char *name,
+			uint64_t cookie, uint64_t flags, uint64_t timeout,
+			int64_t priority, uint64_t dst_id, int cancel_fd);
+int kdbus_msg_send_reply(const struct kdbus_conn *conn,
+			 uint64_t reply_cookie,
+			 uint64_t dst_id);
+struct kdbus_conn *kdbus_hello(const char *path, uint64_t hello_flags,
+			       const struct kdbus_item *item,
+			       size_t item_size);
+struct kdbus_conn *kdbus_hello_registrar(const char *path, const char *name,
+					 const struct kdbus_policy_access *access,
+					 size_t num_access, uint64_t flags);
+struct kdbus_conn *kdbus_hello_activator(const char *path, const char *name,
+					 const struct kdbus_policy_access *access,
+					 size_t num_access);
+bool kdbus_item_in_message(struct kdbus_msg *msg, uint64_t type);
+int kdbus_bus_creator_info(struct kdbus_conn *conn,
+			   uint64_t flags,
+			   uint64_t *offset);
+int kdbus_conn_info(struct kdbus_conn *conn, uint64_t id,
+		    const char *name, uint64_t flags, uint64_t *offset);
+void kdbus_conn_free(struct kdbus_conn *conn);
+int kdbus_conn_update_attach_flags(struct kdbus_conn *conn,
+				   uint64_t attach_flags_send,
+				   uint64_t attach_flags_recv);
+int kdbus_conn_update_policy(struct kdbus_conn *conn, const char *name,
+			     const struct kdbus_policy_access *access,
+			     size_t num_access);
+
+int kdbus_add_match_id(struct kdbus_conn *conn, uint64_t cookie,
+		       uint64_t type, uint64_t id);
+int kdbus_add_match_empty(struct kdbus_conn *conn);
+
+int all_uids_gids_are_mapped(void);
+int drop_privileges(uid_t uid, gid_t gid);
+uint64_t now(clockid_t clock);
+char *unique_name(const char *prefix);
+
+int userns_map_uid_gid(pid_t pid, const char *map_uid, const char *map_gid);
+int test_is_capable(int cap, ...);
+int config_user_ns_is_enabled(void);
+int config_auditsyscall_is_enabled(void);
+int config_cgroups_is_enabled(void);
+int config_security_is_enabled(void);
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-activator.c linux-4.2-pck/tools/testing/selftests/kdbus/test-activator.c
--- linux-4.2/tools/testing/selftests/kdbus/test-activator.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-activator.c	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,321 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <poll.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "kdbus-test.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+static int kdbus_starter_poll(struct kdbus_conn *conn)
+{
+	int ret;
+	struct pollfd fd;
+
+	fd.fd = conn->fd;
+	fd.events = POLLIN | POLLPRI | POLLHUP;
+	fd.revents = 0;
+
+	ret = poll(&fd, 1, 100);
+	if (ret == 0)
+		return -ETIMEDOUT;
+	else if (ret > 0) {
+		if (fd.revents & POLLIN)
+			return 0;
+
+		if (fd.revents & (POLLHUP | POLLERR))
+			ret = -ECONNRESET;
+	}
+
+	return ret;
+}
+
+/* Ensure that kdbus activator logic is safe */
+static int kdbus_priv_activator(struct kdbus_test_env *env)
+{
+	int ret;
+	struct kdbus_msg *msg = NULL;
+	uint64_t cookie = 0xdeadbeef;
+	uint64_t flags;
+	struct kdbus_conn *activator;
+	struct kdbus_conn *service;
+	struct kdbus_conn *client;
+	struct kdbus_conn *holder;
+	struct kdbus_policy_access *access;
+
+	access = (struct kdbus_policy_access[]){
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = getuid(),
+			.access = KDBUS_POLICY_OWN,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = getuid(),
+			.access = KDBUS_POLICY_TALK,
+		},
+	};
+
+	activator = kdbus_hello_activator(env->buspath, "foo.priv.activator",
+					  access, 2);
+	ASSERT_RETURN(activator);
+
+	service = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(service);
+
+	client = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(client);
+
+	/*
+	 * Make sure that other users can't TALK to the activator
+	 */
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		/* Try to talk using the ID */
+		ret = kdbus_msg_send(unpriv, NULL, 0xdeadbeef, 0, 0,
+				     0, activator->id);
+		ASSERT_EXIT(ret == -ENXIO);
+
+		/* Try to talk to the name */
+		ret = kdbus_msg_send(unpriv, "foo.priv.activator",
+				     0xdeadbeef, 0, 0, 0,
+				     KDBUS_DST_ID_NAME);
+		ASSERT_EXIT(ret == -EPERM);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * Make sure that we did not receive anything, so the
+	 * service will not be started automatically
+	 */
+
+	ret = kdbus_starter_poll(activator);
+	ASSERT_RETURN(ret == -ETIMEDOUT);
+
+	/*
+	 * Now try to emulate the starter/service logic and
+	 * acquire the name.
+	 */
+
+	cookie++;
+	ret = kdbus_msg_send(service, "foo.priv.activator", cookie,
+			     0, 0, 0, KDBUS_DST_ID_NAME);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_starter_poll(activator);
+	ASSERT_RETURN(ret == 0);
+
+	/* Policies are still checked, access denied */
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		flags = KDBUS_NAME_REPLACE_EXISTING;
+		ret = kdbus_name_acquire(unpriv, "foo.priv.activator",
+					 &flags);
+		ASSERT_RETURN(ret == -EPERM);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	flags = KDBUS_NAME_REPLACE_EXISTING;
+	ret = kdbus_name_acquire(service, "foo.priv.activator",
+				 &flags);
+	ASSERT_RETURN(ret == 0);
+
+	/* We read our previous starter message */
+
+	ret = kdbus_msg_recv_poll(service, 100, NULL, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* Try to talk, we still fail */
+
+	cookie++;
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		/* Try to talk to the name */
+		ret = kdbus_msg_send(unpriv, "foo.priv.activator",
+				     cookie, 0, 0, 0,
+				     KDBUS_DST_ID_NAME);
+		ASSERT_EXIT(ret == -EPERM);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/* Still nothing to read */
+
+	ret = kdbus_msg_recv_poll(service, 100, NULL, NULL);
+	ASSERT_RETURN(ret == -ETIMEDOUT);
+
+	/* We receive every thing now */
+
+	cookie++;
+	ret = kdbus_msg_send(client, "foo.priv.activator", cookie,
+			     0, 0, 0, KDBUS_DST_ID_NAME);
+	ASSERT_RETURN(ret == 0);
+	ret = kdbus_msg_recv_poll(service, 100, &msg, NULL);
+	ASSERT_RETURN(ret == 0 && msg->cookie == cookie);
+
+	kdbus_msg_free(msg);
+
+	/* Policies default to deny TALK now */
+	kdbus_conn_free(activator);
+
+	cookie++;
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		/* Try to talk to the name */
+		ret = kdbus_msg_send(unpriv, "foo.priv.activator",
+				     cookie, 0, 0, 0,
+				     KDBUS_DST_ID_NAME);
+		ASSERT_EXIT(ret == -EPERM);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	ret = kdbus_msg_recv_poll(service, 100, NULL, NULL);
+	ASSERT_RETURN(ret == -ETIMEDOUT);
+
+	/* Same user is able to TALK */
+	cookie++;
+	ret = kdbus_msg_send(client, "foo.priv.activator", cookie,
+			     0, 0, 0, KDBUS_DST_ID_NAME);
+	ASSERT_RETURN(ret == 0);
+	ret = kdbus_msg_recv_poll(service, 100, &msg, NULL);
+	ASSERT_RETURN(ret == 0 && msg->cookie == cookie);
+
+	kdbus_msg_free(msg);
+
+	access = (struct kdbus_policy_access []){
+		{
+			.type = KDBUS_POLICY_ACCESS_WORLD,
+			.id = getuid(),
+			.access = KDBUS_POLICY_TALK,
+		},
+	};
+
+	holder = kdbus_hello_registrar(env->buspath, "foo.priv.activator",
+				       access, 1, KDBUS_HELLO_POLICY_HOLDER);
+	ASSERT_RETURN(holder);
+
+	/* Now we are able to TALK to the name */
+
+	cookie++;
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		/* Try to talk to the name */
+		ret = kdbus_msg_send(unpriv, "foo.priv.activator",
+				     cookie, 0, 0, 0,
+				     KDBUS_DST_ID_NAME);
+		ASSERT_EXIT(ret == 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	ret = kdbus_msg_recv_poll(service, 100, NULL, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		flags = KDBUS_NAME_REPLACE_EXISTING;
+		ret = kdbus_name_acquire(unpriv, "foo.priv.activator",
+					 &flags);
+		ASSERT_RETURN(ret == -EPERM);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	kdbus_conn_free(service);
+	kdbus_conn_free(client);
+	kdbus_conn_free(holder);
+
+	return 0;
+}
+
+int kdbus_test_activator(struct kdbus_test_env *env)
+{
+	int ret;
+	struct kdbus_conn *activator;
+	struct pollfd fds[2];
+	bool activator_done = false;
+	struct kdbus_policy_access access[2];
+
+	access[0].type = KDBUS_POLICY_ACCESS_USER;
+	access[0].id = getuid();
+	access[0].access = KDBUS_POLICY_OWN;
+
+	access[1].type = KDBUS_POLICY_ACCESS_WORLD;
+	access[1].access = KDBUS_POLICY_TALK;
+
+	activator = kdbus_hello_activator(env->buspath, "foo.test.activator",
+					  access, 2);
+	ASSERT_RETURN(activator);
+
+	ret = kdbus_add_match_empty(env->conn);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_list(env->conn, KDBUS_LIST_NAMES |
+				    KDBUS_LIST_UNIQUE |
+				    KDBUS_LIST_ACTIVATORS |
+				    KDBUS_LIST_QUEUED);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_send(env->conn, "foo.test.activator", 0xdeafbeef,
+			     0, 0, 0, KDBUS_DST_ID_NAME);
+	ASSERT_RETURN(ret == 0);
+
+	fds[0].fd = activator->fd;
+	fds[1].fd = env->conn->fd;
+
+	kdbus_printf("-- entering poll loop ...\n");
+
+	for (;;) {
+		int i, nfds = sizeof(fds) / sizeof(fds[0]);
+
+		for (i = 0; i < nfds; i++) {
+			fds[i].events = POLLIN | POLLPRI;
+			fds[i].revents = 0;
+		}
+
+		ret = poll(fds, nfds, 3000);
+		ASSERT_RETURN(ret >= 0);
+
+		ret = kdbus_list(env->conn, KDBUS_LIST_NAMES);
+		ASSERT_RETURN(ret == 0);
+
+		if ((fds[0].revents & POLLIN) && !activator_done) {
+			uint64_t flags = KDBUS_NAME_REPLACE_EXISTING;
+
+			kdbus_printf("Starter was called back!\n");
+
+			ret = kdbus_name_acquire(env->conn,
+						 "foo.test.activator", &flags);
+			ASSERT_RETURN(ret == 0);
+
+			activator_done = true;
+		}
+
+		if (fds[1].revents & POLLIN) {
+			kdbus_msg_recv(env->conn, NULL, NULL);
+			break;
+		}
+	}
+
+	/* Check if all uids/gids are mapped */
+	if (!all_uids_gids_are_mapped())
+		return TEST_SKIP;
+
+	/* Check now capabilities, so we run the previous tests */
+	ret = test_is_capable(CAP_SETUID, CAP_SETGID, -1);
+	ASSERT_RETURN(ret >= 0);
+
+	if (!ret)
+		return TEST_SKIP;
+
+	ret = kdbus_priv_activator(env);
+	ASSERT_RETURN(ret == 0);
+
+	kdbus_conn_free(activator);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-benchmark.c linux-4.2-pck/tools/testing/selftests/kdbus/test-benchmark.c
--- linux-4.2/tools/testing/selftests/kdbus/test-benchmark.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-benchmark.c	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,451 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <assert.h>
+#include <poll.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <math.h>
+
+#include "kdbus-api.h"
+#include "kdbus-test.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+#define SERVICE_NAME "foo.bar.echo"
+
+/*
+ * To have a banchmark comparison with unix socket, set:
+ * user_memfd	= false;
+ * compare_uds	= true;
+ * attach_none	= true;		do not attached metadata
+ */
+
+static bool use_memfd = true;		/* transmit memfd? */
+static bool compare_uds = false;		/* unix-socket comparison? */
+static bool attach_none = false;		/* clear attach-flags? */
+static char stress_payload[8192];
+
+struct stats {
+	uint64_t count;
+	uint64_t latency_acc;
+	uint64_t latency_low;
+	uint64_t latency_high;
+	uint64_t latency_avg;
+	uint64_t latency_ssquares;
+};
+
+static struct stats stats;
+
+static void reset_stats(void)
+{
+	stats.count = 0;
+	stats.latency_acc = 0;
+	stats.latency_low = UINT64_MAX;
+	stats.latency_high = 0;
+	stats.latency_avg = 0;
+	stats.latency_ssquares = 0;
+}
+
+static void dump_stats(bool is_uds)
+{
+	if (stats.count > 0) {
+		kdbus_printf("stats %s: %'llu packets processed, latency (nsecs) min/max/avg/dev %'7llu // %'7llu // %'7llu // %'7.f\n",
+			     is_uds ? " (UNIX)" : "(KDBUS)",
+			     (unsigned long long) stats.count,
+			     (unsigned long long) stats.latency_low,
+			     (unsigned long long) stats.latency_high,
+			     (unsigned long long) stats.latency_avg,
+			     sqrt(stats.latency_ssquares / stats.count));
+	} else {
+		kdbus_printf("*** no packets received. bus stuck?\n");
+	}
+}
+
+static void add_stats(uint64_t prev)
+{
+	uint64_t diff, latency_avg_prev;
+
+	diff = now(CLOCK_THREAD_CPUTIME_ID) - prev;
+
+	stats.count++;
+	stats.latency_acc += diff;
+
+	/* see Welford62 */
+	latency_avg_prev = stats.latency_avg;
+	stats.latency_avg = stats.latency_acc / stats.count;
+	stats.latency_ssquares += (diff - latency_avg_prev) * (diff - stats.latency_avg);
+
+	if (stats.latency_low > diff)
+		stats.latency_low = diff;
+
+	if (stats.latency_high < diff)
+		stats.latency_high = diff;
+}
+
+static int setup_simple_kdbus_msg(struct kdbus_conn *conn,
+				  uint64_t dst_id,
+				  struct kdbus_msg **msg_out)
+{
+	struct kdbus_msg *msg;
+	struct kdbus_item *item;
+	uint64_t size;
+
+	size = sizeof(struct kdbus_msg);
+	size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_vec));
+
+	msg = malloc(size);
+	ASSERT_RETURN_VAL(msg, -ENOMEM);
+
+	memset(msg, 0, size);
+	msg->size = size;
+	msg->src_id = conn->id;
+	msg->dst_id = dst_id;
+	msg->payload_type = KDBUS_PAYLOAD_DBUS;
+
+	item = msg->items;
+
+	item->type = KDBUS_ITEM_PAYLOAD_VEC;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+	item->vec.address = (uintptr_t) stress_payload;
+	item->vec.size = sizeof(stress_payload);
+	item = KDBUS_ITEM_NEXT(item);
+
+	*msg_out = msg;
+
+	return 0;
+}
+
+static int setup_memfd_kdbus_msg(struct kdbus_conn *conn,
+				 uint64_t dst_id,
+				 off_t *memfd_item_offset,
+				 struct kdbus_msg **msg_out)
+{
+	struct kdbus_msg *msg;
+	struct kdbus_item *item;
+	uint64_t size;
+
+	size = sizeof(struct kdbus_msg);
+	size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_vec));
+	size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_memfd));
+
+	msg = malloc(size);
+	ASSERT_RETURN_VAL(msg, -ENOMEM);
+
+	memset(msg, 0, size);
+	msg->size = size;
+	msg->src_id = conn->id;
+	msg->dst_id = dst_id;
+	msg->payload_type = KDBUS_PAYLOAD_DBUS;
+
+	item = msg->items;
+
+	item->type = KDBUS_ITEM_PAYLOAD_VEC;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+	item->vec.address = (uintptr_t) stress_payload;
+	item->vec.size = sizeof(stress_payload);
+	item = KDBUS_ITEM_NEXT(item);
+
+	item->type = KDBUS_ITEM_PAYLOAD_MEMFD;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_memfd);
+	item->memfd.size = sizeof(uint64_t);
+
+	*memfd_item_offset = (unsigned char *)item - (unsigned char *)msg;
+	*msg_out = msg;
+
+	return 0;
+}
+
+static int
+send_echo_request(struct kdbus_conn *conn, uint64_t dst_id,
+		  void *kdbus_msg, off_t memfd_item_offset)
+{
+	struct kdbus_cmd_send cmd = {};
+	int memfd = -1;
+	int ret;
+
+	if (use_memfd) {
+		uint64_t now_ns = now(CLOCK_THREAD_CPUTIME_ID);
+		struct kdbus_item *item = memfd_item_offset + kdbus_msg;
+		memfd = sys_memfd_create("memfd-name", 0);
+		ASSERT_RETURN_VAL(memfd >= 0, memfd);
+
+		ret = write(memfd, &now_ns, sizeof(now_ns));
+		ASSERT_RETURN_VAL(ret == sizeof(now_ns), -EAGAIN);
+
+		ret = sys_memfd_seal_set(memfd);
+		ASSERT_RETURN_VAL(ret == 0, -errno);
+
+		item->memfd.fd = memfd;
+	}
+
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)kdbus_msg;
+
+	ret = kdbus_cmd_send(conn->fd, &cmd);
+	ASSERT_RETURN_VAL(ret == 0, ret);
+
+	close(memfd);
+
+	return 0;
+}
+
+static int
+handle_echo_reply(struct kdbus_conn *conn, uint64_t send_ns)
+{
+	int ret;
+	struct kdbus_cmd_recv recv = { .size = sizeof(recv) };
+	struct kdbus_msg *msg;
+	const struct kdbus_item *item;
+	bool has_memfd = false;
+
+	ret = kdbus_cmd_recv(conn->fd, &recv);
+	if (ret == -EAGAIN)
+		return ret;
+
+	ASSERT_RETURN_VAL(ret == 0, ret);
+
+	if (!use_memfd)
+		goto out;
+
+	msg = (struct kdbus_msg *)(conn->buf + recv.msg.offset);
+
+	KDBUS_ITEM_FOREACH(item, msg, items) {
+		switch (item->type) {
+		case KDBUS_ITEM_PAYLOAD_MEMFD: {
+			char *buf;
+
+			buf = mmap(NULL, item->memfd.size, PROT_READ,
+				   MAP_PRIVATE, item->memfd.fd, 0);
+			ASSERT_RETURN_VAL(buf != MAP_FAILED, -EINVAL);
+			ASSERT_RETURN_VAL(item->memfd.size == sizeof(uint64_t),
+					  -EINVAL);
+
+			add_stats(*(uint64_t*)buf);
+			munmap(buf, item->memfd.size);
+			close(item->memfd.fd);
+			has_memfd = true;
+			break;
+		}
+
+		case KDBUS_ITEM_PAYLOAD_OFF:
+			/* ignore */
+			break;
+		}
+	}
+
+out:
+	if (!has_memfd)
+		add_stats(send_ns);
+
+	ret = kdbus_free(conn, recv.msg.offset);
+	ASSERT_RETURN_VAL(ret == 0, -errno);
+
+	return 0;
+}
+
+static int benchmark(struct kdbus_test_env *env)
+{
+	static char buf[sizeof(stress_payload)];
+	struct kdbus_msg *kdbus_msg = NULL;
+	off_t memfd_cached_offset = 0;
+	int ret;
+	struct kdbus_conn *conn_a, *conn_b;
+	struct pollfd fds[2];
+	uint64_t start, send_ns, now_ns, diff;
+	unsigned int i;
+	int uds[2];
+
+	setlocale(LC_ALL, "");
+
+	for (i = 0; i < sizeof(stress_payload); i++)
+		stress_payload[i] = i;
+
+	/* setup kdbus pair */
+
+	conn_a = kdbus_hello(env->buspath, 0, NULL, 0);
+	conn_b = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn_a && conn_b);
+
+	ret = kdbus_add_match_empty(conn_a);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_add_match_empty(conn_b);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_name_acquire(conn_a, SERVICE_NAME, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	if (attach_none) {
+		ret = kdbus_conn_update_attach_flags(conn_a,
+						     _KDBUS_ATTACH_ALL,
+						     0);
+		ASSERT_RETURN(ret == 0);
+	}
+
+	/* setup UDS pair */
+
+	ret = socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK, 0, uds);
+	ASSERT_RETURN(ret == 0);
+
+	/* setup a kdbus msg now */
+	if (use_memfd) {
+		ret = setup_memfd_kdbus_msg(conn_b, conn_a->id,
+					    &memfd_cached_offset,
+					    &kdbus_msg);
+		ASSERT_RETURN(ret == 0);
+	} else {
+		ret = setup_simple_kdbus_msg(conn_b, conn_a->id, &kdbus_msg);
+		ASSERT_RETURN(ret == 0);
+	}
+
+	/* start benchmark */
+
+	kdbus_printf("-- entering poll loop ...\n");
+
+	do {
+		/* run kdbus benchmark */
+		fds[0].fd = conn_a->fd;
+		fds[1].fd = conn_b->fd;
+
+		/* cancel any pending message */
+		handle_echo_reply(conn_a, 0);
+
+		start = now(CLOCK_THREAD_CPUTIME_ID);
+		reset_stats();
+
+		send_ns = now(CLOCK_THREAD_CPUTIME_ID);
+		ret = send_echo_request(conn_b, conn_a->id,
+					kdbus_msg, memfd_cached_offset);
+		ASSERT_RETURN(ret == 0);
+
+		while (1) {
+			unsigned int nfds = sizeof(fds) / sizeof(fds[0]);
+			unsigned int i;
+
+			for (i = 0; i < nfds; i++) {
+				fds[i].events = POLLIN | POLLPRI | POLLHUP;
+				fds[i].revents = 0;
+			}
+
+			ret = poll(fds, nfds, 10);
+			if (ret < 0)
+				break;
+
+			if (fds[0].revents & POLLIN) {
+				ret = handle_echo_reply(conn_a, send_ns);
+				ASSERT_RETURN(ret == 0);
+
+				send_ns = now(CLOCK_THREAD_CPUTIME_ID);
+				ret = send_echo_request(conn_b, conn_a->id,
+							kdbus_msg,
+							memfd_cached_offset);
+				ASSERT_RETURN(ret == 0);
+			}
+
+			now_ns = now(CLOCK_THREAD_CPUTIME_ID);
+			diff = now_ns - start;
+			if (diff > 1000000000ULL) {
+				start = now_ns;
+
+				dump_stats(false);
+				break;
+			}
+		}
+
+		if (!compare_uds)
+			continue;
+
+		/* run unix-socket benchmark as comparison */
+
+		fds[0].fd = uds[0];
+		fds[1].fd = uds[1];
+
+		/* cancel any pendign message */
+		read(uds[1], buf, sizeof(buf));
+
+		start = now(CLOCK_THREAD_CPUTIME_ID);
+		reset_stats();
+
+		send_ns = now(CLOCK_THREAD_CPUTIME_ID);
+		ret = write(uds[0], stress_payload, sizeof(stress_payload));
+		ASSERT_RETURN(ret == sizeof(stress_payload));
+
+		while (1) {
+			unsigned int nfds = sizeof(fds) / sizeof(fds[0]);
+			unsigned int i;
+
+			for (i = 0; i < nfds; i++) {
+				fds[i].events = POLLIN | POLLPRI | POLLHUP;
+				fds[i].revents = 0;
+			}
+
+			ret = poll(fds, nfds, 10);
+			if (ret < 0)
+				break;
+
+			if (fds[1].revents & POLLIN) {
+				ret = read(uds[1], buf, sizeof(buf));
+				ASSERT_RETURN(ret == sizeof(buf));
+
+				add_stats(send_ns);
+
+				send_ns = now(CLOCK_THREAD_CPUTIME_ID);
+				ret = write(uds[0], buf, sizeof(buf));
+				ASSERT_RETURN(ret == sizeof(buf));
+			}
+
+			now_ns = now(CLOCK_THREAD_CPUTIME_ID);
+			diff = now_ns - start;
+			if (diff > 1000000000ULL) {
+				start = now_ns;
+
+				dump_stats(true);
+				break;
+			}
+		}
+
+	} while (kdbus_util_verbose);
+
+	kdbus_printf("-- closing bus connections\n");
+
+	free(kdbus_msg);
+
+	kdbus_conn_free(conn_a);
+	kdbus_conn_free(conn_b);
+
+	return (stats.count > 1) ? TEST_OK : TEST_ERR;
+}
+
+int kdbus_test_benchmark(struct kdbus_test_env *env)
+{
+	use_memfd = true;
+	attach_none = false;
+	compare_uds = false;
+	return benchmark(env);
+}
+
+int kdbus_test_benchmark_nomemfds(struct kdbus_test_env *env)
+{
+	use_memfd = false;
+	attach_none = false;
+	compare_uds = false;
+	return benchmark(env);
+}
+
+int kdbus_test_benchmark_uds(struct kdbus_test_env *env)
+{
+	use_memfd = false;
+	attach_none = true;
+	compare_uds = true;
+	return benchmark(env);
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-bus.c linux-4.2-pck/tools/testing/selftests/kdbus/test-bus.c
--- linux-4.2/tools/testing/selftests/kdbus/test-bus.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-bus.c	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,175 @@
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+
+#include "kdbus-api.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+#include "kdbus-test.h"
+
+static struct kdbus_item *kdbus_get_item(struct kdbus_info *info,
+					 uint64_t type)
+{
+	struct kdbus_item *item;
+
+	KDBUS_ITEM_FOREACH(item, info, items)
+		if (item->type == type)
+			return item;
+
+	return NULL;
+}
+
+static int test_bus_creator_info(const char *bus_path)
+{
+	int ret;
+	uint64_t offset;
+	struct kdbus_conn *conn;
+	struct kdbus_info *info;
+	struct kdbus_item *item;
+	char *tmp, *busname;
+
+	/* extract the bus-name from @bus_path */
+	tmp = strdup(bus_path);
+	ASSERT_RETURN(tmp);
+	busname = strrchr(tmp, '/');
+	ASSERT_RETURN(busname);
+	*busname = 0;
+	busname = strrchr(tmp, '/');
+	ASSERT_RETURN(busname);
+	++busname;
+
+	conn = kdbus_hello(bus_path, 0, NULL, 0);
+	ASSERT_RETURN(conn);
+
+	ret = kdbus_bus_creator_info(conn, _KDBUS_ATTACH_ALL, &offset);
+	ASSERT_RETURN(ret == 0);
+
+	info = (struct kdbus_info *)(conn->buf + offset);
+
+	item = kdbus_get_item(info, KDBUS_ITEM_MAKE_NAME);
+	ASSERT_RETURN(item);
+	ASSERT_RETURN(!strcmp(item->str, busname));
+
+	ret = kdbus_free(conn, offset);
+	ASSERT_RETURN_VAL(ret == 0, ret);
+
+	free(tmp);
+	kdbus_conn_free(conn);
+	return 0;
+}
+
+int kdbus_test_bus_make(struct kdbus_test_env *env)
+{
+	struct {
+		struct kdbus_cmd cmd;
+
+		/* bloom size item */
+		struct {
+			uint64_t size;
+			uint64_t type;
+			struct kdbus_bloom_parameter bloom;
+		} bs;
+
+		/* name item */
+		uint64_t n_size;
+		uint64_t n_type;
+		char name[64];
+	} bus_make;
+	char s[PATH_MAX], *name;
+	int ret, control_fd2;
+	uid_t uid;
+
+	name = unique_name("");
+	ASSERT_RETURN(name);
+
+	snprintf(s, sizeof(s), "%s/control", env->root);
+	env->control_fd = open(s, O_RDWR|O_CLOEXEC);
+	ASSERT_RETURN(env->control_fd >= 0);
+
+	control_fd2 = open(s, O_RDWR|O_CLOEXEC);
+	ASSERT_RETURN(control_fd2 >= 0);
+
+	memset(&bus_make, 0, sizeof(bus_make));
+
+	bus_make.bs.size = sizeof(bus_make.bs);
+	bus_make.bs.type = KDBUS_ITEM_BLOOM_PARAMETER;
+	bus_make.bs.bloom.size = 64;
+	bus_make.bs.bloom.n_hash = 1;
+
+	bus_make.n_type = KDBUS_ITEM_MAKE_NAME;
+
+	uid = getuid();
+
+	/* missing uid prefix */
+	snprintf(bus_make.name, sizeof(bus_make.name), "foo");
+	bus_make.n_size = KDBUS_ITEM_HEADER_SIZE + strlen(bus_make.name) + 1;
+	bus_make.cmd.size = sizeof(struct kdbus_cmd) +
+			    sizeof(bus_make.bs) + bus_make.n_size;
+	ret = kdbus_cmd_bus_make(env->control_fd, &bus_make.cmd);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	/* non alphanumeric character */
+	snprintf(bus_make.name, sizeof(bus_make.name), "%u-blah@123", uid);
+	bus_make.n_size = KDBUS_ITEM_HEADER_SIZE + strlen(bus_make.name) + 1;
+	bus_make.cmd.size = sizeof(struct kdbus_cmd) +
+			    sizeof(bus_make.bs) + bus_make.n_size;
+	ret = kdbus_cmd_bus_make(env->control_fd, &bus_make.cmd);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	/* '-' at the end */
+	snprintf(bus_make.name, sizeof(bus_make.name), "%u-blah-", uid);
+	bus_make.n_size = KDBUS_ITEM_HEADER_SIZE + strlen(bus_make.name) + 1;
+	bus_make.cmd.size = sizeof(struct kdbus_cmd) +
+			    sizeof(bus_make.bs) + bus_make.n_size;
+	ret = kdbus_cmd_bus_make(env->control_fd, &bus_make.cmd);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	/* create a new bus */
+	snprintf(bus_make.name, sizeof(bus_make.name), "%u-%s-1", uid, name);
+	bus_make.n_size = KDBUS_ITEM_HEADER_SIZE + strlen(bus_make.name) + 1;
+	bus_make.cmd.size = sizeof(struct kdbus_cmd) +
+			    sizeof(bus_make.bs) + bus_make.n_size;
+	ret = kdbus_cmd_bus_make(env->control_fd, &bus_make.cmd);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_cmd_bus_make(control_fd2, &bus_make.cmd);
+	ASSERT_RETURN(ret == -EEXIST);
+
+	snprintf(s, sizeof(s), "%s/%u-%s-1/bus", env->root, uid, name);
+	ASSERT_RETURN(access(s, F_OK) == 0);
+
+	ret = test_bus_creator_info(s);
+	ASSERT_RETURN(ret == 0);
+
+	/* can't use the same fd for bus make twice, even though a different
+	 * bus name is used
+	 */
+	snprintf(bus_make.name, sizeof(bus_make.name), "%u-%s-2", uid, name);
+	bus_make.n_size = KDBUS_ITEM_HEADER_SIZE + strlen(bus_make.name) + 1;
+	bus_make.cmd.size = sizeof(struct kdbus_cmd) +
+			    sizeof(bus_make.bs) + bus_make.n_size;
+	ret = kdbus_cmd_bus_make(env->control_fd, &bus_make.cmd);
+	ASSERT_RETURN(ret == -EBADFD);
+
+	/* create a new bus, with different fd and different bus name */
+	snprintf(bus_make.name, sizeof(bus_make.name), "%u-%s-2", uid, name);
+	bus_make.n_size = KDBUS_ITEM_HEADER_SIZE + strlen(bus_make.name) + 1;
+	bus_make.cmd.size = sizeof(struct kdbus_cmd) +
+			    sizeof(bus_make.bs) + bus_make.n_size;
+	ret = kdbus_cmd_bus_make(control_fd2, &bus_make.cmd);
+	ASSERT_RETURN(ret == 0);
+
+	close(control_fd2);
+	free(name);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-chat.c linux-4.2-pck/tools/testing/selftests/kdbus/test-chat.c
--- linux-4.2/tools/testing/selftests/kdbus/test-chat.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-chat.c	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,124 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <poll.h>
+#include <stdbool.h>
+
+#include "kdbus-test.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+int kdbus_test_chat(struct kdbus_test_env *env)
+{
+	int ret, cookie;
+	struct kdbus_conn *conn_a, *conn_b;
+	struct pollfd fds[2];
+	uint64_t flags;
+	int count;
+
+	conn_a = kdbus_hello(env->buspath, 0, NULL, 0);
+	conn_b = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn_a && conn_b);
+
+	flags = KDBUS_NAME_ALLOW_REPLACEMENT;
+	ret = kdbus_name_acquire(conn_a, "foo.bar.test", &flags);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_name_acquire(conn_a, "foo.bar.baz", NULL);
+	ASSERT_RETURN(ret == 0);
+
+	flags = KDBUS_NAME_QUEUE;
+	ret = kdbus_name_acquire(conn_b, "foo.bar.baz", &flags);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_name_acquire(conn_a, "foo.bar.double", NULL);
+	ASSERT_RETURN(ret == 0);
+
+	flags = 0;
+	ret = kdbus_name_acquire(conn_a, "foo.bar.double", &flags);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(!(flags & KDBUS_NAME_ACQUIRED));
+
+	ret = kdbus_name_release(conn_a, "foo.bar.double");
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_name_release(conn_a, "foo.bar.double");
+	ASSERT_RETURN(ret == -ESRCH);
+
+	ret = kdbus_list(conn_b, KDBUS_LIST_UNIQUE |
+				 KDBUS_LIST_NAMES  |
+				 KDBUS_LIST_QUEUED |
+				 KDBUS_LIST_ACTIVATORS);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_add_match_empty(conn_a);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_add_match_empty(conn_b);
+	ASSERT_RETURN(ret == 0);
+
+	cookie = 0;
+	ret = kdbus_msg_send(conn_b, NULL, 0xc0000000 | cookie, 0, 0, 0,
+			     KDBUS_DST_ID_BROADCAST);
+	ASSERT_RETURN(ret == 0);
+
+	fds[0].fd = conn_a->fd;
+	fds[1].fd = conn_b->fd;
+
+	kdbus_printf("-- entering poll loop ...\n");
+
+	for (count = 0;; count++) {
+		int i, nfds = sizeof(fds) / sizeof(fds[0]);
+
+		for (i = 0; i < nfds; i++) {
+			fds[i].events = POLLIN | POLLPRI | POLLHUP;
+			fds[i].revents = 0;
+		}
+
+		ret = poll(fds, nfds, 3000);
+		ASSERT_RETURN(ret >= 0);
+
+		if (fds[0].revents & POLLIN) {
+			if (count > 2)
+				kdbus_name_release(conn_a, "foo.bar.baz");
+
+			ret = kdbus_msg_recv(conn_a, NULL, NULL);
+			ASSERT_RETURN(ret == 0);
+			ret = kdbus_msg_send(conn_a, NULL,
+					     0xc0000000 | cookie++,
+					     0, 0, 0, conn_b->id);
+			ASSERT_RETURN(ret == 0);
+		}
+
+		if (fds[1].revents & POLLIN) {
+			ret = kdbus_msg_recv(conn_b, NULL, NULL);
+			ASSERT_RETURN(ret == 0);
+			ret = kdbus_msg_send(conn_b, NULL,
+					     0xc0000000 | cookie++,
+					     0, 0, 0, conn_a->id);
+			ASSERT_RETURN(ret == 0);
+		}
+
+		ret = kdbus_list(conn_b, KDBUS_LIST_UNIQUE |
+					 KDBUS_LIST_NAMES  |
+					 KDBUS_LIST_QUEUED |
+					 KDBUS_LIST_ACTIVATORS);
+		ASSERT_RETURN(ret == 0);
+
+		if (count > 10)
+			break;
+	}
+
+	kdbus_printf("-- closing bus connections\n");
+	kdbus_conn_free(conn_a);
+	kdbus_conn_free(conn_b);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-connection.c linux-4.2-pck/tools/testing/selftests/kdbus/test-connection.c
--- linux-4.2/tools/testing/selftests/kdbus/test-connection.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-connection.c	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,597 @@
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/capability.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+
+#include "kdbus-api.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+#include "kdbus-test.h"
+
+int kdbus_test_hello(struct kdbus_test_env *env)
+{
+	struct kdbus_cmd_free cmd_free = {};
+	struct kdbus_cmd_hello hello;
+	int fd, ret;
+
+	memset(&hello, 0, sizeof(hello));
+
+	fd = open(env->buspath, O_RDWR|O_CLOEXEC);
+	ASSERT_RETURN(fd >= 0);
+
+	hello.flags = KDBUS_HELLO_ACCEPT_FD;
+	hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+	hello.attach_flags_recv = _KDBUS_ATTACH_ALL;
+	hello.size = sizeof(struct kdbus_cmd_hello);
+	hello.pool_size = POOL_SIZE;
+
+	/* an unaligned hello must result in -EFAULT */
+	ret = kdbus_cmd_hello(fd, (struct kdbus_cmd_hello *) ((char *) &hello + 1));
+	ASSERT_RETURN(ret == -EFAULT);
+
+	/* a size of 0 must return EMSGSIZE */
+	hello.size = 1;
+	hello.flags = KDBUS_HELLO_ACCEPT_FD;
+	hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+	ret = kdbus_cmd_hello(fd, &hello);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	hello.size = sizeof(struct kdbus_cmd_hello);
+
+	/* check faulty flags */
+	hello.flags = 1ULL << 32;
+	hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+	ret = kdbus_cmd_hello(fd, &hello);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	/* check for faulty pool sizes */
+	hello.pool_size = 0;
+	hello.flags = KDBUS_HELLO_ACCEPT_FD;
+	hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+	ret = kdbus_cmd_hello(fd, &hello);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	hello.pool_size = 4097;
+	hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+	ret = kdbus_cmd_hello(fd, &hello);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	hello.pool_size = POOL_SIZE;
+
+	hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+	hello.offset = (__u64)-1;
+
+	/* success test */
+	ret = kdbus_cmd_hello(fd, &hello);
+	ASSERT_RETURN(ret == 0);
+
+	/* The kernel should have returned some items */
+	ASSERT_RETURN(hello.offset != (__u64)-1);
+	cmd_free.size = sizeof(cmd_free);
+	cmd_free.offset = hello.offset;
+	ret = kdbus_cmd_free(fd, &cmd_free);
+	ASSERT_RETURN(ret >= 0);
+
+	close(fd);
+
+	fd = open(env->buspath, O_RDWR|O_CLOEXEC);
+	ASSERT_RETURN(fd >= 0);
+
+	/* no ACTIVATOR flag without a name */
+	hello.flags = KDBUS_HELLO_ACTIVATOR;
+	ret = kdbus_cmd_hello(fd, &hello);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	close(fd);
+
+	return TEST_OK;
+}
+
+int kdbus_test_byebye(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *conn;
+	struct kdbus_cmd_recv cmd_recv = { .size = sizeof(cmd_recv) };
+	struct kdbus_cmd cmd_byebye = { .size = sizeof(cmd_byebye) };
+	int ret;
+
+	/* create a 2nd connection */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn != NULL);
+
+	ret = kdbus_add_match_empty(conn);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_add_match_empty(env->conn);
+	ASSERT_RETURN(ret == 0);
+
+	/* send over 1st connection */
+	ret = kdbus_msg_send(env->conn, NULL, 0, 0, 0, 0,
+			     KDBUS_DST_ID_BROADCAST);
+	ASSERT_RETURN(ret == 0);
+
+	/* say byebye on the 2nd, which must fail */
+	ret = kdbus_cmd_byebye(conn->fd, &cmd_byebye);
+	ASSERT_RETURN(ret == -EBUSY);
+
+	/* receive the message */
+	ret = kdbus_cmd_recv(conn->fd, &cmd_recv);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_free(conn, cmd_recv.msg.offset);
+	ASSERT_RETURN(ret == 0);
+
+	/* and try again */
+	ret = kdbus_cmd_byebye(conn->fd, &cmd_byebye);
+	ASSERT_RETURN(ret == 0);
+
+	/* a 2nd try should result in -ECONNRESET */
+	ret = kdbus_cmd_byebye(conn->fd, &cmd_byebye);
+	ASSERT_RETURN(ret == -ECONNRESET);
+
+	kdbus_conn_free(conn);
+
+	return TEST_OK;
+}
+
+/* Get only the first item */
+static struct kdbus_item *kdbus_get_item(struct kdbus_info *info,
+					 uint64_t type)
+{
+	struct kdbus_item *item;
+
+	KDBUS_ITEM_FOREACH(item, info, items)
+		if (item->type == type)
+			return item;
+
+	return NULL;
+}
+
+static unsigned int kdbus_count_item(struct kdbus_info *info,
+				     uint64_t type)
+{
+	unsigned int i = 0;
+	const struct kdbus_item *item;
+
+	KDBUS_ITEM_FOREACH(item, info, items)
+		if (item->type == type)
+			i++;
+
+	return i;
+}
+
+static int kdbus_fuzz_conn_info(struct kdbus_test_env *env, int capable)
+{
+	int ret;
+	unsigned int cnt = 0;
+	uint64_t offset = 0;
+	struct kdbus_info *info;
+	struct kdbus_conn *conn;
+	struct kdbus_conn *privileged;
+	const struct kdbus_item *item;
+	uint64_t valid_flags = KDBUS_ATTACH_NAMES |
+			       KDBUS_ATTACH_CREDS |
+			       KDBUS_ATTACH_PIDS |
+			       KDBUS_ATTACH_CONN_DESCRIPTION;
+
+	uint64_t invalid_flags = KDBUS_ATTACH_NAMES	|
+				 KDBUS_ATTACH_CREDS	|
+				 KDBUS_ATTACH_PIDS	|
+				 KDBUS_ATTACH_CAPS	|
+				 KDBUS_ATTACH_CGROUP	|
+				 KDBUS_ATTACH_CONN_DESCRIPTION;
+
+	struct kdbus_creds cached_creds;
+	uid_t ruid, euid, suid;
+	gid_t rgid, egid, sgid;
+
+	getresuid(&ruid, &euid, &suid);
+	getresgid(&rgid, &egid, &sgid);
+
+	cached_creds.uid = ruid;
+	cached_creds.euid = euid;
+	cached_creds.suid = suid;
+	cached_creds.fsuid = ruid;
+
+	cached_creds.gid = rgid;
+	cached_creds.egid = egid;
+	cached_creds.sgid = sgid;
+	cached_creds.fsgid = rgid;
+
+	struct kdbus_pids cached_pids = {
+		.pid	= getpid(),
+		.tid	= syscall(SYS_gettid),
+		.ppid	= getppid(),
+	};
+
+	ret = kdbus_conn_info(env->conn, env->conn->id, NULL,
+			      valid_flags, &offset);
+	ASSERT_RETURN(ret == 0);
+
+	info = (struct kdbus_info *)(env->conn->buf + offset);
+	ASSERT_RETURN(info->id == env->conn->id);
+
+	/* We do not have any well-known name */
+	item = kdbus_get_item(info, KDBUS_ITEM_NAME);
+	ASSERT_RETURN(item == NULL);
+
+	item = kdbus_get_item(info, KDBUS_ITEM_CONN_DESCRIPTION);
+	if (valid_flags & KDBUS_ATTACH_CONN_DESCRIPTION) {
+		ASSERT_RETURN(item);
+	} else {
+		ASSERT_RETURN(item == NULL);
+	}
+
+	kdbus_free(env->conn, offset);
+
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn);
+
+	privileged = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(privileged);
+
+	ret = kdbus_conn_info(conn, conn->id, NULL, valid_flags, &offset);
+	ASSERT_RETURN(ret == 0);
+
+	info = (struct kdbus_info *)(conn->buf + offset);
+	ASSERT_RETURN(info->id == conn->id);
+
+	/* We do not have any well-known name */
+	item = kdbus_get_item(info, KDBUS_ITEM_NAME);
+	ASSERT_RETURN(item == NULL);
+
+	cnt = kdbus_count_item(info, KDBUS_ITEM_CREDS);
+	if (valid_flags & KDBUS_ATTACH_CREDS) {
+		ASSERT_RETURN(cnt == 1);
+
+		item = kdbus_get_item(info, KDBUS_ITEM_CREDS);
+		ASSERT_RETURN(item);
+
+		/* Compare received items with cached creds */
+		ASSERT_RETURN(memcmp(&item->creds, &cached_creds,
+				      sizeof(struct kdbus_creds)) == 0);
+	} else {
+		ASSERT_RETURN(cnt == 0);
+	}
+
+	item = kdbus_get_item(info, KDBUS_ITEM_PIDS);
+	if (valid_flags & KDBUS_ATTACH_PIDS) {
+		ASSERT_RETURN(item);
+
+		/* Compare item->pids with cached PIDs */
+		ASSERT_RETURN(item->pids.pid == cached_pids.pid &&
+			      item->pids.tid == cached_pids.tid &&
+			      item->pids.ppid == cached_pids.ppid);
+	} else {
+		ASSERT_RETURN(item == NULL);
+	}
+
+	/* We did not request KDBUS_ITEM_CAPS */
+	item = kdbus_get_item(info, KDBUS_ITEM_CAPS);
+	ASSERT_RETURN(item == NULL);
+
+	kdbus_free(conn, offset);
+
+	ret = kdbus_name_acquire(conn, "com.example.a", NULL);
+	ASSERT_RETURN(ret >= 0);
+
+	ret = kdbus_conn_info(conn, conn->id, NULL, valid_flags, &offset);
+	ASSERT_RETURN(ret == 0);
+
+	info = (struct kdbus_info *)(conn->buf + offset);
+	ASSERT_RETURN(info->id == conn->id);
+
+	item = kdbus_get_item(info, KDBUS_ITEM_OWNED_NAME);
+	if (valid_flags & KDBUS_ATTACH_NAMES) {
+		ASSERT_RETURN(item && !strcmp(item->name.name, "com.example.a"));
+	} else {
+		ASSERT_RETURN(item == NULL);
+	}
+
+	kdbus_free(conn, offset);
+
+	ret = kdbus_conn_info(conn, 0, "com.example.a", valid_flags, &offset);
+	ASSERT_RETURN(ret == 0);
+
+	info = (struct kdbus_info *)(conn->buf + offset);
+	ASSERT_RETURN(info->id == conn->id);
+
+	kdbus_free(conn, offset);
+
+	/* does not have the necessary caps to drop to unprivileged */
+	if (!capable)
+		goto continue_test;
+
+	ret = RUN_UNPRIVILEGED(UNPRIV_UID, UNPRIV_GID, ({
+		ret = kdbus_conn_info(conn, conn->id, NULL,
+				      valid_flags, &offset);
+		ASSERT_EXIT(ret == 0);
+
+		info = (struct kdbus_info *)(conn->buf + offset);
+		ASSERT_EXIT(info->id == conn->id);
+
+		if (valid_flags & KDBUS_ATTACH_NAMES) {
+			item = kdbus_get_item(info, KDBUS_ITEM_OWNED_NAME);
+			ASSERT_EXIT(item &&
+				    strcmp(item->name.name,
+				           "com.example.a") == 0);
+		}
+
+		if (valid_flags & KDBUS_ATTACH_CREDS) {
+			item = kdbus_get_item(info, KDBUS_ITEM_CREDS);
+			ASSERT_EXIT(item);
+
+			/* Compare received items with cached creds */
+			ASSERT_EXIT(memcmp(&item->creds, &cached_creds,
+				    sizeof(struct kdbus_creds)) == 0);
+		}
+
+		if (valid_flags & KDBUS_ATTACH_PIDS) {
+			item = kdbus_get_item(info, KDBUS_ITEM_PIDS);
+			ASSERT_EXIT(item);
+
+			/*
+			 * Compare item->pids with cached pids of
+			 * privileged one.
+			 *
+			 * cmd_info will always return cached pids.
+			 */
+			ASSERT_EXIT(item->pids.pid == cached_pids.pid &&
+				    item->pids.tid == cached_pids.tid);
+		}
+
+		kdbus_free(conn, offset);
+
+		/*
+		 * Use invalid_flags and make sure that userspace
+		 * do not play with us.
+		 */
+		ret = kdbus_conn_info(conn, conn->id, NULL,
+				      invalid_flags, &offset);
+		ASSERT_EXIT(ret == 0);
+
+		/*
+		 * Make sure that we return only one creds item and
+		 * it points to the cached creds.
+		 */
+		cnt = kdbus_count_item(info, KDBUS_ITEM_CREDS);
+		if (invalid_flags & KDBUS_ATTACH_CREDS) {
+			ASSERT_EXIT(cnt == 1);
+
+			item = kdbus_get_item(info, KDBUS_ITEM_CREDS);
+			ASSERT_EXIT(item);
+
+			/* Compare received items with cached creds */
+			ASSERT_EXIT(memcmp(&item->creds, &cached_creds,
+				    sizeof(struct kdbus_creds)) == 0);
+		} else {
+			ASSERT_EXIT(cnt == 0);
+		}
+
+		if (invalid_flags & KDBUS_ATTACH_PIDS) {
+			cnt = kdbus_count_item(info, KDBUS_ITEM_PIDS);
+			ASSERT_EXIT(cnt == 1);
+
+			item = kdbus_get_item(info, KDBUS_ITEM_PIDS);
+			ASSERT_EXIT(item);
+
+			/* Compare item->pids with cached pids */
+			ASSERT_EXIT(item->pids.pid == cached_pids.pid &&
+				    item->pids.tid == cached_pids.tid);
+		}
+
+		cnt = kdbus_count_item(info, KDBUS_ITEM_CGROUP);
+		if (invalid_flags & KDBUS_ATTACH_CGROUP) {
+			ASSERT_EXIT(cnt == 1);
+		} else {
+			ASSERT_EXIT(cnt == 0);
+		}
+
+		cnt = kdbus_count_item(info, KDBUS_ITEM_CAPS);
+		if (invalid_flags & KDBUS_ATTACH_CAPS) {
+			ASSERT_EXIT(cnt == 1);
+		} else {
+			ASSERT_EXIT(cnt == 0);
+		}
+
+		kdbus_free(conn, offset);
+	}),
+	({ 0; }));
+	ASSERT_RETURN(ret == 0);
+
+continue_test:
+
+	/* A second name */
+	ret = kdbus_name_acquire(conn, "com.example.b", NULL);
+	ASSERT_RETURN(ret >= 0);
+
+	ret = kdbus_conn_info(conn, conn->id, NULL, valid_flags, &offset);
+	ASSERT_RETURN(ret == 0);
+
+	info = (struct kdbus_info *)(conn->buf + offset);
+	ASSERT_RETURN(info->id == conn->id);
+
+	cnt = kdbus_count_item(info, KDBUS_ITEM_OWNED_NAME);
+	if (valid_flags & KDBUS_ATTACH_NAMES) {
+		ASSERT_RETURN(cnt == 2);
+	} else {
+		ASSERT_RETURN(cnt == 0);
+	}
+
+	kdbus_free(conn, offset);
+
+	ASSERT_RETURN(ret == 0);
+
+	return 0;
+}
+
+int kdbus_test_conn_info(struct kdbus_test_env *env)
+{
+	int ret;
+	int have_caps;
+	struct {
+		struct kdbus_cmd_info cmd_info;
+
+		struct {
+			uint64_t size;
+			uint64_t type;
+			char str[64];
+		} name;
+	} buf;
+
+	buf.cmd_info.size = sizeof(struct kdbus_cmd_info);
+	buf.cmd_info.flags = 0;
+	buf.cmd_info.attach_flags = 0;
+	buf.cmd_info.id = env->conn->id;
+
+	ret = kdbus_conn_info(env->conn, env->conn->id, NULL, 0, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* try to pass a name that is longer than the buffer's size */
+	buf.name.size = KDBUS_ITEM_HEADER_SIZE + 1;
+	buf.name.type = KDBUS_ITEM_NAME;
+	strcpy(buf.name.str, "foo.bar.bla");
+
+	buf.cmd_info.id = 0;
+	buf.cmd_info.size = sizeof(buf.cmd_info) + buf.name.size;
+	ret = kdbus_cmd_conn_info(env->conn->fd, (struct kdbus_cmd_info *) &buf);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	/* Pass a non existent name */
+	ret = kdbus_conn_info(env->conn, 0, "non.existent.name", 0, NULL);
+	ASSERT_RETURN(ret == -ESRCH);
+
+	if (!all_uids_gids_are_mapped())
+		return TEST_SKIP;
+
+	/* Test for caps here, so we run the previous test */
+	have_caps = test_is_capable(CAP_SETUID, CAP_SETGID, -1);
+	ASSERT_RETURN(have_caps >= 0);
+
+	ret = kdbus_fuzz_conn_info(env, have_caps);
+	ASSERT_RETURN(ret == 0);
+
+	/* Now if we have skipped some tests then let the user know */
+	if (!have_caps)
+		return TEST_SKIP;
+
+	return TEST_OK;
+}
+
+int kdbus_test_conn_update(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *conn;
+	struct kdbus_msg *msg;
+	int found = 0;
+	int ret;
+
+	/*
+	 * kdbus_hello() sets all attach flags. Receive a message by this
+	 * connection, and make sure a timestamp item (just to pick one) is
+	 * present.
+	 */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn);
+
+	ret = kdbus_msg_send(env->conn, NULL, 0x12345678, 0, 0, 0, conn->id);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	found = kdbus_item_in_message(msg, KDBUS_ITEM_TIMESTAMP);
+	ASSERT_RETURN(found == 1);
+
+	kdbus_msg_free(msg);
+
+	/*
+	 * Now, modify the attach flags and repeat the action. The item must
+	 * now be missing.
+	 */
+	found = 0;
+
+	ret = kdbus_conn_update_attach_flags(conn,
+					     _KDBUS_ATTACH_ALL,
+					     _KDBUS_ATTACH_ALL &
+					     ~KDBUS_ATTACH_TIMESTAMP);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_send(env->conn, NULL, 0x12345678, 0, 0, 0, conn->id);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	found = kdbus_item_in_message(msg, KDBUS_ITEM_TIMESTAMP);
+	ASSERT_RETURN(found == 0);
+
+	/* Provide a bogus attach_flags value */
+	ret = kdbus_conn_update_attach_flags(conn,
+					     _KDBUS_ATTACH_ALL + 1,
+					     _KDBUS_ATTACH_ALL);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	kdbus_msg_free(msg);
+
+	kdbus_conn_free(conn);
+
+	return TEST_OK;
+}
+
+int kdbus_test_writable_pool(struct kdbus_test_env *env)
+{
+	struct kdbus_cmd_free cmd_free = {};
+	struct kdbus_cmd_hello hello;
+	int fd, ret;
+	void *map;
+
+	fd = open(env->buspath, O_RDWR | O_CLOEXEC);
+	ASSERT_RETURN(fd >= 0);
+
+	memset(&hello, 0, sizeof(hello));
+	hello.flags = KDBUS_HELLO_ACCEPT_FD;
+	hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+	hello.attach_flags_recv = _KDBUS_ATTACH_ALL;
+	hello.size = sizeof(struct kdbus_cmd_hello);
+	hello.pool_size = POOL_SIZE;
+	hello.offset = (__u64)-1;
+
+	/* success test */
+	ret = kdbus_cmd_hello(fd, &hello);
+	ASSERT_RETURN(ret == 0);
+
+	/* The kernel should have returned some items */
+	ASSERT_RETURN(hello.offset != (__u64)-1);
+	cmd_free.size = sizeof(cmd_free);
+	cmd_free.offset = hello.offset;
+	ret = kdbus_cmd_free(fd, &cmd_free);
+	ASSERT_RETURN(ret >= 0);
+
+	/* pools cannot be mapped writable */
+	map = mmap(NULL, POOL_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	ASSERT_RETURN(map == MAP_FAILED);
+
+	/* pools can always be mapped readable */
+	map = mmap(NULL, POOL_SIZE, PROT_READ, MAP_SHARED, fd, 0);
+	ASSERT_RETURN(map != MAP_FAILED);
+
+	/* make sure we cannot change protection masks to writable */
+	ret = mprotect(map, POOL_SIZE, PROT_READ | PROT_WRITE);
+	ASSERT_RETURN(ret < 0);
+
+	munmap(map, POOL_SIZE);
+	close(fd);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-daemon.c linux-4.2-pck/tools/testing/selftests/kdbus/test-daemon.c
--- linux-4.2/tools/testing/selftests/kdbus/test-daemon.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-daemon.c	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,65 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <poll.h>
+#include <stdbool.h>
+
+#include "kdbus-test.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+int kdbus_test_daemon(struct kdbus_test_env *env)
+{
+	struct pollfd fds[2];
+	int count;
+	int ret;
+
+	/* This test doesn't make any sense in non-interactive mode */
+	if (!kdbus_util_verbose)
+		return TEST_OK;
+
+	printf("Created connection %llu on bus '%s'\n",
+		(unsigned long long) env->conn->id, env->buspath);
+
+	ret = kdbus_name_acquire(env->conn, "com.example.kdbus-test", NULL);
+	ASSERT_RETURN(ret == 0);
+	printf("  Aquired name: com.example.kdbus-test\n");
+
+	fds[0].fd = env->conn->fd;
+	fds[1].fd = STDIN_FILENO;
+
+	printf("Monitoring connections:\n");
+
+	for (count = 0;; count++) {
+		int i, nfds = sizeof(fds) / sizeof(fds[0]);
+
+		for (i = 0; i < nfds; i++) {
+			fds[i].events = POLLIN | POLLPRI | POLLHUP;
+			fds[i].revents = 0;
+		}
+
+		ret = poll(fds, nfds, -1);
+		if (ret <= 0)
+			break;
+
+		if (fds[0].revents & POLLIN) {
+			ret = kdbus_msg_recv(env->conn, NULL, NULL);
+			ASSERT_RETURN(ret == 0);
+		}
+
+		/* stdin */
+		if (fds[1].revents & POLLIN)
+			break;
+	}
+
+	printf("Closing bus connection\n");
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-endpoint.c linux-4.2-pck/tools/testing/selftests/kdbus/test-endpoint.c
--- linux-4.2/tools/testing/selftests/kdbus/test-endpoint.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-endpoint.c	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,352 @@
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <libgen.h>
+#include <sys/capability.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+
+#include "kdbus-api.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+#include "kdbus-test.h"
+
+#define KDBUS_SYSNAME_MAX_LEN			63
+
+static int install_name_add_match(struct kdbus_conn *conn, const char *name)
+{
+	struct {
+		struct kdbus_cmd_match cmd;
+		struct {
+			uint64_t size;
+			uint64_t type;
+			struct kdbus_notify_name_change chg;
+		} item;
+		char name[64];
+	} buf;
+	int ret;
+
+	/* install the match rule */
+	memset(&buf, 0, sizeof(buf));
+	buf.item.type = KDBUS_ITEM_NAME_ADD;
+	buf.item.chg.old_id.id = KDBUS_MATCH_ID_ANY;
+	buf.item.chg.new_id.id = KDBUS_MATCH_ID_ANY;
+	strncpy(buf.name, name, sizeof(buf.name) - 1);
+	buf.item.size = sizeof(buf.item) + strlen(buf.name) + 1;
+	buf.cmd.size = sizeof(buf.cmd) + buf.item.size;
+
+	ret = kdbus_cmd_match_add(conn->fd, &buf.cmd);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int create_endpoint(const char *buspath, uid_t uid, const char *name,
+			   uint64_t flags)
+{
+	struct {
+		struct kdbus_cmd cmd;
+
+		/* name item */
+		struct {
+			uint64_t size;
+			uint64_t type;
+			/* max should be KDBUS_SYSNAME_MAX_LEN */
+			char str[128];
+		} name;
+	} ep_make;
+	int fd, ret;
+
+	fd = open(buspath, O_RDWR);
+	if (fd < 0)
+		return fd;
+
+	memset(&ep_make, 0, sizeof(ep_make));
+
+	snprintf(ep_make.name.str,
+		 /* Use the KDBUS_SYSNAME_MAX_LEN or sizeof(str) */
+		 KDBUS_SYSNAME_MAX_LEN > strlen(name) ?
+		 KDBUS_SYSNAME_MAX_LEN : sizeof(ep_make.name.str),
+		 "%u-%s", uid, name);
+
+	ep_make.name.type = KDBUS_ITEM_MAKE_NAME;
+	ep_make.name.size = KDBUS_ITEM_HEADER_SIZE +
+			    strlen(ep_make.name.str) + 1;
+
+	ep_make.cmd.flags = flags;
+	ep_make.cmd.size = sizeof(ep_make.cmd) + ep_make.name.size;
+
+	ret = kdbus_cmd_endpoint_make(fd, &ep_make.cmd);
+	if (ret < 0) {
+		kdbus_printf("error creating endpoint: %d (%m)\n", ret);
+		return ret;
+	}
+
+	return fd;
+}
+
+static int unpriv_test_custom_ep(const char *buspath)
+{
+	int ret, ep_fd1, ep_fd2;
+	char *ep1, *ep2, *tmp1, *tmp2;
+
+	tmp1 = strdup(buspath);
+	tmp2 = strdup(buspath);
+	ASSERT_RETURN(tmp1 && tmp2);
+
+	ret = asprintf(&ep1, "%s/%u-%s", dirname(tmp1), getuid(), "apps1");
+	ASSERT_RETURN(ret >= 0);
+
+	ret = asprintf(&ep2, "%s/%u-%s", dirname(tmp2), getuid(), "apps2");
+	ASSERT_RETURN(ret >= 0);
+
+	free(tmp1);
+	free(tmp2);
+
+	/* endpoint only accessible to current uid */
+	ep_fd1 = create_endpoint(buspath, getuid(), "apps1", 0);
+	ASSERT_RETURN(ep_fd1 >= 0);
+
+	/* endpoint world accessible */
+	ep_fd2 = create_endpoint(buspath, getuid(), "apps2",
+				  KDBUS_MAKE_ACCESS_WORLD);
+	ASSERT_RETURN(ep_fd2 >= 0);
+
+	ret = RUN_UNPRIVILEGED(UNPRIV_UID, UNPRIV_UID, ({
+		int ep_fd;
+		struct kdbus_conn *ep_conn;
+
+		/*
+		 * Make sure that we are not able to create custom
+		 * endpoints
+		 */
+		ep_fd = create_endpoint(buspath, getuid(),
+					"unpriv_costum_ep", 0);
+		ASSERT_EXIT(ep_fd == -EPERM);
+
+		/*
+		 * Endpoint "apps1" only accessible to same users,
+		 * that own the endpoint. Access denied by VFS
+		 */
+		ep_conn = kdbus_hello(ep1, 0, NULL, 0);
+		ASSERT_EXIT(!ep_conn && errno == EACCES);
+
+		/* Endpoint "apps2" world accessible */
+		ep_conn = kdbus_hello(ep2, 0, NULL, 0);
+		ASSERT_EXIT(ep_conn);
+
+		kdbus_conn_free(ep_conn);
+
+		_exit(EXIT_SUCCESS);
+	}),
+	({ 0; }));
+	ASSERT_RETURN(ret == 0);
+
+	close(ep_fd1);
+	close(ep_fd2);
+	free(ep1);
+	free(ep2);
+
+	return 0;
+}
+
+static int update_endpoint(int fd, const char *name)
+{
+	int len = strlen(name) + 1;
+	struct {
+		struct kdbus_cmd cmd;
+
+		/* name item */
+		struct {
+			uint64_t size;
+			uint64_t type;
+			char str[KDBUS_ALIGN8(len)];
+		} name;
+
+		struct {
+			uint64_t size;
+			uint64_t type;
+			struct kdbus_policy_access access;
+		} access;
+	} ep_update;
+	int ret;
+
+	memset(&ep_update, 0, sizeof(ep_update));
+
+	ep_update.name.size = KDBUS_ITEM_HEADER_SIZE + len;
+	ep_update.name.type = KDBUS_ITEM_NAME;
+	strncpy(ep_update.name.str, name, sizeof(ep_update.name.str) - 1);
+
+	ep_update.access.size = sizeof(ep_update.access);
+	ep_update.access.type = KDBUS_ITEM_POLICY_ACCESS;
+	ep_update.access.access.type = KDBUS_POLICY_ACCESS_WORLD;
+	ep_update.access.access.access = KDBUS_POLICY_SEE;
+
+	ep_update.cmd.size = sizeof(ep_update);
+
+	ret = kdbus_cmd_endpoint_update(fd, &ep_update.cmd);
+	if (ret < 0) {
+		kdbus_printf("error updating endpoint: %d (%m)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int kdbus_test_custom_endpoint(struct kdbus_test_env *env)
+{
+	char *ep, *tmp;
+	int ret, ep_fd;
+	struct kdbus_msg *msg;
+	struct kdbus_conn *ep_conn;
+	struct kdbus_conn *reader;
+	const char *name = "foo.bar.baz";
+	const char *epname = "foo";
+	char fake_ep[KDBUS_SYSNAME_MAX_LEN + 1] = {'\0'};
+
+	memset(fake_ep, 'X', sizeof(fake_ep) - 1);
+
+	/* Try to create a custom endpoint with a long name */
+	ret = create_endpoint(env->buspath, getuid(), fake_ep, 0);
+	ASSERT_RETURN(ret == -ENAMETOOLONG);
+
+	/* Try to create a custom endpoint with a different uid */
+	ret = create_endpoint(env->buspath, getuid() + 1, "foobar", 0);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	/* create a custom endpoint, and open a connection on it */
+	ep_fd = create_endpoint(env->buspath, getuid(), "foo", 0);
+	ASSERT_RETURN(ep_fd >= 0);
+
+	tmp = strdup(env->buspath);
+	ASSERT_RETURN(tmp);
+
+	ret = asprintf(&ep, "%s/%u-%s", dirname(tmp), getuid(), epname);
+	free(tmp);
+	ASSERT_RETURN(ret >= 0);
+
+	/* Register a connection that listen to broadcasts */
+	reader = kdbus_hello(ep, 0, NULL, 0);
+	ASSERT_RETURN(reader);
+
+	/* Register to kernel signals */
+	ret = kdbus_add_match_id(reader, 0x1, KDBUS_ITEM_ID_ADD,
+				 KDBUS_MATCH_ID_ANY);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_add_match_id(reader, 0x2, KDBUS_ITEM_ID_REMOVE,
+				 KDBUS_MATCH_ID_ANY);
+	ASSERT_RETURN(ret == 0);
+
+	ret = install_name_add_match(reader, name);
+	ASSERT_RETURN(ret == 0);
+
+	/* Monitor connections are not supported on custom endpoints */
+	ep_conn = kdbus_hello(ep, KDBUS_HELLO_MONITOR, NULL, 0);
+	ASSERT_RETURN(!ep_conn && errno == EOPNOTSUPP);
+
+	ep_conn = kdbus_hello(ep, 0, NULL, 0);
+	ASSERT_RETURN(ep_conn);
+
+	/* Check that the reader got the IdAdd notification */
+	ret = kdbus_msg_recv(reader, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->items[0].type == KDBUS_ITEM_ID_ADD);
+	ASSERT_RETURN(msg->items[0].id_change.id == ep_conn->id);
+	kdbus_msg_free(msg);
+
+	/*
+	 * Add a name add match on the endpoint connection, acquire name from
+	 * the unfiltered connection, and make sure the filtered connection
+	 * did not get the notification on the name owner change. Also, the
+	 * endpoint connection may not be able to call conn_info, neither on
+	 * the name nor on the ID.
+	 */
+	ret = install_name_add_match(ep_conn, name);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_name_acquire(env->conn, name, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(ep_conn, NULL, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	ret = kdbus_conn_info(ep_conn, 0, name, 0, NULL);
+	ASSERT_RETURN(ret == -ESRCH);
+
+	ret = kdbus_conn_info(ep_conn, 0, "random.crappy.name", 0, NULL);
+	ASSERT_RETURN(ret == -ESRCH);
+
+	ret = kdbus_conn_info(ep_conn, env->conn->id, NULL, 0, NULL);
+	ASSERT_RETURN(ret == -ENXIO);
+
+	ret = kdbus_conn_info(ep_conn, 0x0fffffffffffffffULL, NULL, 0, NULL);
+	ASSERT_RETURN(ret == -ENXIO);
+
+	/* Check that the reader did not receive the name notification */
+	ret = kdbus_msg_recv(reader, NULL, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	/*
+	 * Release the name again, update the custom endpoint policy,
+	 * and try again. This time, the connection on the custom endpoint
+	 * should have gotten it.
+	 */
+	ret = kdbus_name_release(env->conn, name);
+	ASSERT_RETURN(ret == 0);
+
+	/* Check that the reader did not receive the name notification */
+	ret = kdbus_msg_recv(reader, NULL, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	ret = update_endpoint(ep_fd, name);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_name_acquire(env->conn, name, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(ep_conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->items[0].type == KDBUS_ITEM_NAME_ADD);
+	ASSERT_RETURN(msg->items[0].name_change.old_id.id == 0);
+	ASSERT_RETURN(msg->items[0].name_change.new_id.id == env->conn->id);
+	ASSERT_RETURN(strcmp(msg->items[0].name_change.name, name) == 0);
+	kdbus_msg_free(msg);
+
+	ret = kdbus_msg_recv(reader, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(strcmp(msg->items[0].name_change.name, name) == 0);
+
+	kdbus_msg_free(msg);
+
+	ret = kdbus_conn_info(ep_conn, 0, name, 0, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_conn_info(ep_conn, env->conn->id, NULL, 0, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* If we have privileges test custom endpoints */
+	ret = test_is_capable(CAP_SETUID, CAP_SETGID, -1);
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * All uids/gids are mapped and we have the necessary caps
+	 */
+	if (ret && all_uids_gids_are_mapped()) {
+		ret = unpriv_test_custom_ep(env->buspath);
+		ASSERT_RETURN(ret == 0);
+	}
+
+	kdbus_conn_free(reader);
+	kdbus_conn_free(ep_conn);
+	close(ep_fd);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-fd.c linux-4.2-pck/tools/testing/selftests/kdbus/test-fd.c
--- linux-4.2/tools/testing/selftests/kdbus/test-fd.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-fd.c	2015-09-08 00:48:22.245029757 -0300
@@ -0,0 +1,789 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+
+#include "kdbus-api.h"
+#include "kdbus-test.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+#define KDBUS_MSG_MAX_ITEMS     128
+#define KDBUS_USER_MAX_CONN	256
+
+/* maximum number of inflight fds in a target queue per user */
+#define KDBUS_CONN_MAX_FDS_PER_USER	16
+
+/* maximum number of memfd items per message */
+#define KDBUS_MSG_MAX_MEMFD_ITEMS       16
+
+static int make_msg_payload_dbus(uint64_t src_id, uint64_t dst_id,
+				 uint64_t msg_size,
+				 struct kdbus_msg **msg_dbus)
+{
+	struct kdbus_msg *msg;
+
+	msg = malloc(msg_size);
+	ASSERT_RETURN_VAL(msg, -ENOMEM);
+
+	memset(msg, 0, msg_size);
+	msg->size = msg_size;
+	msg->src_id = src_id;
+	msg->dst_id = dst_id;
+	msg->payload_type = KDBUS_PAYLOAD_DBUS;
+
+	*msg_dbus = msg;
+
+	return 0;
+}
+
+static void make_item_memfds(struct kdbus_item *item,
+			     int *memfds, size_t memfd_size)
+{
+	size_t i;
+
+	for (i = 0; i < memfd_size; i++) {
+		item->type = KDBUS_ITEM_PAYLOAD_MEMFD;
+		item->size = KDBUS_ITEM_HEADER_SIZE +
+			     sizeof(struct kdbus_memfd);
+		item->memfd.fd = memfds[i];
+		item->memfd.size = sizeof(uint64_t); /* const size */
+		item = KDBUS_ITEM_NEXT(item);
+	}
+}
+
+static void make_item_fds(struct kdbus_item *item,
+			  int *fd_array, size_t fd_size)
+{
+	size_t i;
+	item->type = KDBUS_ITEM_FDS;
+	item->size = KDBUS_ITEM_HEADER_SIZE + (sizeof(int) * fd_size);
+
+	for (i = 0; i < fd_size; i++)
+		item->fds[i] = fd_array[i];
+}
+
+static int memfd_write(const char *name, void *buf, size_t bufsize)
+{
+	ssize_t ret;
+	int memfd;
+
+	memfd = sys_memfd_create(name, 0);
+	ASSERT_RETURN_VAL(memfd >= 0, memfd);
+
+	ret = write(memfd, buf, bufsize);
+	ASSERT_RETURN_VAL(ret == (ssize_t)bufsize, -EAGAIN);
+
+	ret = sys_memfd_seal_set(memfd);
+	ASSERT_RETURN_VAL(ret == 0, -errno);
+
+	return memfd;
+}
+
+static int send_memfds(struct kdbus_conn *conn, uint64_t dst_id,
+		       int *memfds_array, size_t memfd_count)
+{
+	struct kdbus_cmd_send cmd = {};
+	struct kdbus_item *item;
+	struct kdbus_msg *msg;
+	uint64_t size;
+	int ret;
+
+	size = sizeof(struct kdbus_msg);
+	size += memfd_count * KDBUS_ITEM_SIZE(sizeof(struct kdbus_memfd));
+
+	if (dst_id == KDBUS_DST_ID_BROADCAST)
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_bloom_filter)) + 64;
+
+	ret = make_msg_payload_dbus(conn->id, dst_id, size, &msg);
+	ASSERT_RETURN_VAL(ret == 0, ret);
+
+	item = msg->items;
+
+	if (dst_id == KDBUS_DST_ID_BROADCAST) {
+		item->type = KDBUS_ITEM_BLOOM_FILTER;
+		item->size = KDBUS_ITEM_SIZE(sizeof(struct kdbus_bloom_filter)) + 64;
+		item = KDBUS_ITEM_NEXT(item);
+
+		msg->flags |= KDBUS_MSG_SIGNAL;
+	}
+
+	make_item_memfds(item, memfds_array, memfd_count);
+
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)msg;
+
+	ret = kdbus_cmd_send(conn->fd, &cmd);
+	if (ret < 0) {
+		kdbus_printf("error sending message: %d (%m)\n", ret);
+		return ret;
+	}
+
+	free(msg);
+	return 0;
+}
+
+static int send_fds(struct kdbus_conn *conn, uint64_t dst_id,
+		    int *fd_array, size_t fd_count)
+{
+	struct kdbus_cmd_send cmd = {};
+	struct kdbus_item *item;
+	struct kdbus_msg *msg;
+	uint64_t size;
+	int ret;
+
+	size = sizeof(struct kdbus_msg);
+	size += KDBUS_ITEM_SIZE(sizeof(int) * fd_count);
+
+	if (dst_id == KDBUS_DST_ID_BROADCAST)
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_bloom_filter)) + 64;
+
+	ret = make_msg_payload_dbus(conn->id, dst_id, size, &msg);
+	ASSERT_RETURN_VAL(ret == 0, ret);
+
+	item = msg->items;
+
+	if (dst_id == KDBUS_DST_ID_BROADCAST) {
+		item->type = KDBUS_ITEM_BLOOM_FILTER;
+		item->size = KDBUS_ITEM_SIZE(sizeof(struct kdbus_bloom_filter)) + 64;
+		item = KDBUS_ITEM_NEXT(item);
+
+		msg->flags |= KDBUS_MSG_SIGNAL;
+	}
+
+	make_item_fds(item, fd_array, fd_count);
+
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)msg;
+
+	ret = kdbus_cmd_send(conn->fd, &cmd);
+	if (ret < 0) {
+		kdbus_printf("error sending message: %d (%m)\n", ret);
+		return ret;
+	}
+
+	free(msg);
+	return ret;
+}
+
+static int send_fds_memfds(struct kdbus_conn *conn, uint64_t dst_id,
+			   int *fds_array, size_t fd_count,
+			   int *memfds_array, size_t memfd_count)
+{
+	struct kdbus_cmd_send cmd = {};
+	struct kdbus_item *item;
+	struct kdbus_msg *msg;
+	uint64_t size;
+	int ret;
+
+	size = sizeof(struct kdbus_msg);
+	size += memfd_count * KDBUS_ITEM_SIZE(sizeof(struct kdbus_memfd));
+	size += KDBUS_ITEM_SIZE(sizeof(int) * fd_count);
+
+	ret = make_msg_payload_dbus(conn->id, dst_id, size, &msg);
+	ASSERT_RETURN_VAL(ret == 0, ret);
+
+	item = msg->items;
+
+	make_item_fds(item, fds_array, fd_count);
+	item = KDBUS_ITEM_NEXT(item);
+	make_item_memfds(item, memfds_array, memfd_count);
+
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)msg;
+
+	ret = kdbus_cmd_send(conn->fd, &cmd);
+	if (ret < 0) {
+		kdbus_printf("error sending message: %d (%m)\n", ret);
+		return ret;
+	}
+
+	free(msg);
+	return ret;
+}
+
+/* Return the number of received fds */
+static unsigned int kdbus_item_get_nfds(struct kdbus_msg *msg)
+{
+	unsigned int fds = 0;
+	const struct kdbus_item *item;
+
+	KDBUS_ITEM_FOREACH(item, msg, items) {
+		switch (item->type) {
+		case KDBUS_ITEM_FDS: {
+			fds += (item->size - KDBUS_ITEM_HEADER_SIZE) /
+				sizeof(int);
+			break;
+		}
+
+		case KDBUS_ITEM_PAYLOAD_MEMFD:
+			fds++;
+			break;
+
+		default:
+			break;
+		}
+	}
+
+	return fds;
+}
+
+static struct kdbus_msg *
+get_kdbus_msg_with_fd(struct kdbus_conn *conn_src,
+		      uint64_t dst_id, uint64_t cookie, int fd)
+{
+	int ret;
+	uint64_t size;
+	struct kdbus_item *item;
+	struct kdbus_msg *msg;
+
+	size = sizeof(struct kdbus_msg);
+	if (fd >= 0)
+		size += KDBUS_ITEM_SIZE(sizeof(int));
+
+	ret = make_msg_payload_dbus(conn_src->id, dst_id, size, &msg);
+	ASSERT_RETURN_VAL(ret == 0, NULL);
+
+	msg->cookie = cookie;
+
+	if (fd >= 0) {
+		item = msg->items;
+
+		make_item_fds(item, (int *)&fd, 1);
+	}
+
+	return msg;
+}
+
+static int kdbus_test_no_fds(struct kdbus_test_env *env,
+			     int *fds, int *memfd)
+{
+	pid_t pid;
+	int ret, status;
+	uint64_t cookie;
+	int connfd1, connfd2;
+	struct kdbus_msg *msg, *msg_sync_reply;
+	struct kdbus_cmd_hello hello;
+	struct kdbus_conn *conn_src, *conn_dst, *conn_dummy;
+	struct kdbus_cmd_send cmd = {};
+	struct kdbus_cmd_free cmd_free = {};
+
+	conn_src = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn_src);
+
+	connfd1 = open(env->buspath, O_RDWR|O_CLOEXEC);
+	ASSERT_RETURN(connfd1 >= 0);
+
+	connfd2 = open(env->buspath, O_RDWR|O_CLOEXEC);
+	ASSERT_RETURN(connfd2 >= 0);
+
+	/*
+	 * Create connections without KDBUS_HELLO_ACCEPT_FD
+	 * to test if send fd operations are blocked
+	 */
+	conn_dst = malloc(sizeof(*conn_dst));
+	ASSERT_RETURN(conn_dst);
+
+	conn_dummy = malloc(sizeof(*conn_dummy));
+	ASSERT_RETURN(conn_dummy);
+
+	memset(&hello, 0, sizeof(hello));
+	hello.size = sizeof(struct kdbus_cmd_hello);
+	hello.pool_size = POOL_SIZE;
+	hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+
+	ret = kdbus_cmd_hello(connfd1, &hello);
+	ASSERT_RETURN(ret == 0);
+
+	cmd_free.size = sizeof(cmd_free);
+	cmd_free.offset = hello.offset;
+	ret = kdbus_cmd_free(connfd1, &cmd_free);
+	ASSERT_RETURN(ret >= 0);
+
+	conn_dst->fd = connfd1;
+	conn_dst->id = hello.id;
+
+	memset(&hello, 0, sizeof(hello));
+	hello.size = sizeof(struct kdbus_cmd_hello);
+	hello.pool_size = POOL_SIZE;
+	hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+
+	ret = kdbus_cmd_hello(connfd2, &hello);
+	ASSERT_RETURN(ret == 0);
+
+	cmd_free.size = sizeof(cmd_free);
+	cmd_free.offset = hello.offset;
+	ret = kdbus_cmd_free(connfd2, &cmd_free);
+	ASSERT_RETURN(ret >= 0);
+
+	conn_dummy->fd = connfd2;
+	conn_dummy->id = hello.id;
+
+	conn_dst->buf = mmap(NULL, POOL_SIZE, PROT_READ,
+			     MAP_SHARED, connfd1, 0);
+	ASSERT_RETURN(conn_dst->buf != MAP_FAILED);
+
+	conn_dummy->buf = mmap(NULL, POOL_SIZE, PROT_READ,
+			       MAP_SHARED, connfd2, 0);
+	ASSERT_RETURN(conn_dummy->buf != MAP_FAILED);
+
+	/*
+	 * Send fds to connection that do not accept fd passing
+	 */
+	ret = send_fds(conn_src, conn_dst->id, fds, 1);
+	ASSERT_RETURN(ret == -ECOMM);
+
+	/*
+	 * memfd are kdbus payload
+	 */
+	ret = send_memfds(conn_src, conn_dst->id, memfd, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv_poll(conn_dst, 100, NULL, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	cookie = time(NULL);
+
+	pid = fork();
+	ASSERT_RETURN_VAL(pid >= 0, pid);
+
+	if (pid == 0) {
+		struct timespec now;
+
+		/*
+		 * A sync send/reply to a connection that do not
+		 * accept fds should fail if it contains an fd
+		 */
+		msg_sync_reply = get_kdbus_msg_with_fd(conn_dst,
+						       conn_dummy->id,
+						       cookie, fds[0]);
+		ASSERT_EXIT(msg_sync_reply);
+
+		ret = clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
+		ASSERT_EXIT(ret == 0);
+
+		msg_sync_reply->timeout_ns = now.tv_sec * 1000000000ULL +
+					     now.tv_nsec + 100000000ULL;
+		msg_sync_reply->flags = KDBUS_MSG_EXPECT_REPLY;
+
+		memset(&cmd, 0, sizeof(cmd));
+		cmd.size = sizeof(cmd);
+		cmd.msg_address = (uintptr_t)msg_sync_reply;
+		cmd.flags = KDBUS_SEND_SYNC_REPLY;
+
+		ret = kdbus_cmd_send(conn_dst->fd, &cmd);
+		ASSERT_EXIT(ret == -ECOMM);
+
+		/*
+		 * Now send a normal message, but the sync reply
+		 * will fail since it contains an fd that the
+		 * original sender do not want.
+		 *
+		 * The original sender will fail with -ETIMEDOUT
+		 */
+		cookie++;
+		ret = kdbus_msg_send_sync(conn_dst, NULL, cookie,
+					  KDBUS_MSG_EXPECT_REPLY,
+					  5000000000ULL, 0, conn_src->id, -1);
+		ASSERT_EXIT(ret == -EREMOTEIO);
+
+		cookie++;
+		ret = kdbus_msg_recv_poll(conn_dst, 100, &msg, NULL);
+		ASSERT_EXIT(ret == 0);
+		ASSERT_EXIT(msg->cookie == cookie);
+
+		free(msg_sync_reply);
+		kdbus_msg_free(msg);
+
+		_exit(EXIT_SUCCESS);
+	}
+
+	ret = kdbus_msg_recv_poll(conn_dummy, 100, NULL, NULL);
+	ASSERT_RETURN(ret == -ETIMEDOUT);
+
+	cookie++;
+	ret = kdbus_msg_recv_poll(conn_src, 100, &msg, NULL);
+	ASSERT_RETURN(ret == 0 && msg->cookie == cookie);
+
+	kdbus_msg_free(msg);
+
+	/*
+	 * Try to reply with a kdbus connection handle, this should
+	 * fail with -EOPNOTSUPP
+	 */
+	msg_sync_reply = get_kdbus_msg_with_fd(conn_src,
+					       conn_dst->id,
+					       cookie, conn_dst->fd);
+	ASSERT_RETURN(msg_sync_reply);
+
+	msg_sync_reply->cookie_reply = cookie;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)msg_sync_reply;
+
+	ret = kdbus_cmd_send(conn_src->fd, &cmd);
+	ASSERT_RETURN(ret == -EOPNOTSUPP);
+
+	free(msg_sync_reply);
+
+	/*
+	 * Try to reply with a normal fd, this should fail even
+	 * if the response is a sync reply
+	 *
+	 * From the sender view we fail with -ECOMM
+	 */
+	msg_sync_reply = get_kdbus_msg_with_fd(conn_src,
+					       conn_dst->id,
+					       cookie, fds[0]);
+	ASSERT_RETURN(msg_sync_reply);
+
+	msg_sync_reply->cookie_reply = cookie;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)msg_sync_reply;
+
+	ret = kdbus_cmd_send(conn_src->fd, &cmd);
+	ASSERT_RETURN(ret == -ECOMM);
+
+	free(msg_sync_reply);
+
+	/*
+	 * Resend another normal message and check if the queue
+	 * is clear
+	 */
+	cookie++;
+	ret = kdbus_msg_send(conn_src, NULL, cookie, 0, 0, 0,
+			     conn_dst->id);
+	ASSERT_RETURN(ret == 0);
+
+	ret = waitpid(pid, &status, 0);
+	ASSERT_RETURN_VAL(ret >= 0, ret);
+
+	kdbus_conn_free(conn_dummy);
+	kdbus_conn_free(conn_dst);
+	kdbus_conn_free(conn_src);
+
+	return (status == EXIT_SUCCESS) ? TEST_OK : TEST_ERR;
+}
+
+static int kdbus_send_multiple_fds(struct kdbus_conn *conn_src,
+				   struct kdbus_conn *conn_dst)
+{
+	int ret, i;
+	unsigned int nfds;
+	int fds[KDBUS_CONN_MAX_FDS_PER_USER + 1];
+	int memfds[KDBUS_MSG_MAX_ITEMS + 1];
+	struct kdbus_msg *msg;
+	uint64_t dummy_value;
+
+	dummy_value = time(NULL);
+
+	for (i = 0; i < KDBUS_CONN_MAX_FDS_PER_USER + 1; i++) {
+		fds[i] = open("/dev/null", O_RDWR|O_CLOEXEC);
+		ASSERT_RETURN_VAL(fds[i] >= 0, -errno);
+	}
+
+	/* Send KDBUS_CONN_MAX_FDS_PER_USER with one more fd */
+	ret = send_fds(conn_src, conn_dst->id, fds,
+		       KDBUS_CONN_MAX_FDS_PER_USER + 1);
+	ASSERT_RETURN(ret == -EMFILE);
+
+	/* Retry with the correct KDBUS_CONN_MAX_FDS_PER_USER */
+	ret = send_fds(conn_src, conn_dst->id, fds,
+		       KDBUS_CONN_MAX_FDS_PER_USER);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(conn_dst, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* Check we got the right number of fds */
+	nfds = kdbus_item_get_nfds(msg);
+	ASSERT_RETURN(nfds == KDBUS_CONN_MAX_FDS_PER_USER);
+
+	kdbus_msg_free(msg);
+
+	for (i = 0; i < KDBUS_MSG_MAX_ITEMS + 1; i++, dummy_value++) {
+		memfds[i] = memfd_write("memfd-name",
+					&dummy_value,
+					sizeof(dummy_value));
+		ASSERT_RETURN_VAL(memfds[i] >= 0, memfds[i]);
+	}
+
+	/* Send KDBUS_MSG_MAX_ITEMS with one more memfd */
+	ret = send_memfds(conn_src, conn_dst->id,
+			  memfds, KDBUS_MSG_MAX_ITEMS + 1);
+	ASSERT_RETURN(ret == -E2BIG);
+
+	ret = send_memfds(conn_src, conn_dst->id,
+			  memfds, KDBUS_MSG_MAX_MEMFD_ITEMS + 1);
+	ASSERT_RETURN(ret == -E2BIG);
+
+	/* Retry with the correct KDBUS_MSG_MAX_ITEMS */
+	ret = send_memfds(conn_src, conn_dst->id,
+			  memfds, KDBUS_MSG_MAX_MEMFD_ITEMS);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(conn_dst, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* Check we got the right number of fds */
+	nfds = kdbus_item_get_nfds(msg);
+	ASSERT_RETURN(nfds == KDBUS_MSG_MAX_MEMFD_ITEMS);
+
+	kdbus_msg_free(msg);
+
+
+	/*
+	 * Combine multiple KDBUS_CONN_MAX_FDS_PER_USER+1 fds and
+	 * 10 memfds
+	 */
+	ret = send_fds_memfds(conn_src, conn_dst->id,
+			      fds, KDBUS_CONN_MAX_FDS_PER_USER + 1,
+			      memfds, 10);
+	ASSERT_RETURN(ret == -EMFILE);
+
+	ret = kdbus_msg_recv(conn_dst, NULL, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	/*
+	 * Combine multiple KDBUS_CONN_MAX_FDS_PER_USER fds and
+	 * (128 - 1) + 1 memfds, all fds take one item, while each
+	 * memfd takes one item
+	 */
+	ret = send_fds_memfds(conn_src, conn_dst->id,
+			      fds, KDBUS_CONN_MAX_FDS_PER_USER,
+			      memfds, (KDBUS_MSG_MAX_ITEMS - 1) + 1);
+	ASSERT_RETURN(ret == -E2BIG);
+
+	ret = send_fds_memfds(conn_src, conn_dst->id,
+			      fds, KDBUS_CONN_MAX_FDS_PER_USER,
+			      memfds, KDBUS_MSG_MAX_MEMFD_ITEMS + 1);
+	ASSERT_RETURN(ret == -E2BIG);
+
+	ret = kdbus_msg_recv(conn_dst, NULL, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	/*
+	 * Send KDBUS_CONN_MAX_FDS_PER_USER fds +
+	 * KDBUS_MSG_MAX_MEMFD_ITEMS memfds
+	 */
+	ret = send_fds_memfds(conn_src, conn_dst->id,
+			      fds, KDBUS_CONN_MAX_FDS_PER_USER,
+			      memfds, KDBUS_MSG_MAX_MEMFD_ITEMS);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(conn_dst, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* Check we got the right number of fds */
+	nfds = kdbus_item_get_nfds(msg);
+	ASSERT_RETURN(nfds == KDBUS_CONN_MAX_FDS_PER_USER +
+			      KDBUS_MSG_MAX_MEMFD_ITEMS);
+
+	kdbus_msg_free(msg);
+
+
+	/*
+	 * Re-send fds + memfds, close them, but do not receive them
+	 * and try to queue more
+	 */
+	ret = send_fds_memfds(conn_src, conn_dst->id,
+			      fds, KDBUS_CONN_MAX_FDS_PER_USER,
+			      memfds, KDBUS_MSG_MAX_MEMFD_ITEMS);
+	ASSERT_RETURN(ret == 0);
+
+	/* close old references and get a new ones */
+	for (i = 0; i < KDBUS_CONN_MAX_FDS_PER_USER + 1; i++) {
+		close(fds[i]);
+		fds[i] = open("/dev/null", O_RDWR|O_CLOEXEC);
+		ASSERT_RETURN_VAL(fds[i] >= 0, -errno);
+	}
+
+	/* should fail since we have already fds in the queue */
+	ret = send_fds(conn_src, conn_dst->id, fds,
+		       KDBUS_CONN_MAX_FDS_PER_USER);
+	ASSERT_RETURN(ret == -EMFILE);
+
+	/* This should succeed */
+	ret = send_memfds(conn_src, conn_dst->id,
+			  memfds, KDBUS_MSG_MAX_MEMFD_ITEMS);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(conn_dst, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	nfds = kdbus_item_get_nfds(msg);
+	ASSERT_RETURN(nfds == KDBUS_CONN_MAX_FDS_PER_USER +
+			      KDBUS_MSG_MAX_MEMFD_ITEMS);
+
+	kdbus_msg_free(msg);
+
+	ret = kdbus_msg_recv(conn_dst, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	nfds = kdbus_item_get_nfds(msg);
+	ASSERT_RETURN(nfds == KDBUS_MSG_MAX_MEMFD_ITEMS);
+
+	kdbus_msg_free(msg);
+
+	ret = kdbus_msg_recv(conn_dst, NULL, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	for (i = 0; i < KDBUS_CONN_MAX_FDS_PER_USER + 1; i++)
+		close(fds[i]);
+
+	for (i = 0; i < KDBUS_MSG_MAX_ITEMS + 1; i++)
+		close(memfds[i]);
+
+	return 0;
+}
+
+int kdbus_test_fd_passing(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *conn_src, *conn_dst;
+	const char *str = "stackenblocken";
+	const struct kdbus_item *item;
+	struct kdbus_msg *msg;
+	unsigned int i;
+	uint64_t now;
+	int fds_conn[2];
+	int sock_pair[2];
+	int fds[2];
+	int memfd;
+	int ret;
+
+	now = (uint64_t) time(NULL);
+
+	/* create two connections */
+	conn_src = kdbus_hello(env->buspath, 0, NULL, 0);
+	conn_dst = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn_src && conn_dst);
+
+	fds_conn[0] = conn_src->fd;
+	fds_conn[1] = conn_dst->fd;
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_pair);
+	ASSERT_RETURN(ret == 0);
+
+	/* Setup memfd */
+	memfd = memfd_write("memfd-name", &now, sizeof(now));
+	ASSERT_RETURN(memfd >= 0);
+
+	/* Setup pipes */
+	ret = pipe(fds);
+	ASSERT_RETURN(ret == 0);
+
+	i = write(fds[1], str, strlen(str));
+	ASSERT_RETURN(i == strlen(str));
+
+	/*
+	 * Try to ass the handle of a connection as message payload.
+	 * This must fail.
+	 */
+	ret = send_fds(conn_src, conn_dst->id, fds_conn, 2);
+	ASSERT_RETURN(ret == -ENOTSUP);
+
+	ret = send_fds(conn_dst, conn_src->id, fds_conn, 2);
+	ASSERT_RETURN(ret == -ENOTSUP);
+
+	ret = send_fds(conn_src, conn_dst->id, sock_pair, 2);
+	ASSERT_RETURN(ret == -ENOTSUP);
+
+	/*
+	 * Send fds and memfds to connection that do not accept fds
+	 */
+	ret = kdbus_test_no_fds(env, fds, (int *)&memfd);
+	ASSERT_RETURN(ret == 0);
+
+	/* Try to broadcast file descriptors. This must fail. */
+	ret = send_fds(conn_src, KDBUS_DST_ID_BROADCAST, fds, 1);
+	ASSERT_RETURN(ret == -ENOTUNIQ);
+
+	/* Try to broadcast memfd. This must succeed. */
+	ret = send_memfds(conn_src, KDBUS_DST_ID_BROADCAST, (int *)&memfd, 1);
+	ASSERT_RETURN(ret == 0);
+
+	/* Open code this loop */
+loop_send_fds:
+
+	/*
+	 * Send the read end of the pipe and close it.
+	 */
+	ret = send_fds(conn_src, conn_dst->id, fds, 1);
+	ASSERT_RETURN(ret == 0);
+	close(fds[0]);
+
+	ret = kdbus_msg_recv(conn_dst, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	KDBUS_ITEM_FOREACH(item, msg, items) {
+		if (item->type == KDBUS_ITEM_FDS) {
+			char tmp[14];
+			int nfds = (item->size - KDBUS_ITEM_HEADER_SIZE) /
+					sizeof(int);
+			ASSERT_RETURN(nfds == 1);
+
+			i = read(item->fds[0], tmp, sizeof(tmp));
+			if (i != 0) {
+				ASSERT_RETURN(i == sizeof(tmp));
+				ASSERT_RETURN(memcmp(tmp, str, sizeof(tmp)) == 0);
+
+				/* Write EOF */
+				close(fds[1]);
+
+				/*
+				 * Resend the read end of the pipe,
+				 * the receiver still holds a reference
+				 * to it...
+				 */
+				goto loop_send_fds;
+			}
+
+			/* Got EOF */
+
+			/*
+			 * Close the last reference to the read end
+			 * of the pipe, other references are
+			 * automatically closed just after send.
+			 */
+			close(item->fds[0]);
+		}
+	}
+
+	/*
+	 * Try to resend the read end of the pipe. Must fail with
+	 * -EBADF since both the sender and receiver closed their
+	 * references to it. We assume the above since sender and
+	 * receiver are on the same process.
+	 */
+	ret = send_fds(conn_src, conn_dst->id, fds, 1);
+	ASSERT_RETURN(ret == -EBADF);
+
+	/* Then we clear out received any data... */
+	kdbus_msg_free(msg);
+
+	ret = kdbus_send_multiple_fds(conn_src, conn_dst);
+	ASSERT_RETURN(ret == 0);
+
+	close(sock_pair[0]);
+	close(sock_pair[1]);
+	close(memfd);
+
+	kdbus_conn_free(conn_src);
+	kdbus_conn_free(conn_dst);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-free.c linux-4.2-pck/tools/testing/selftests/kdbus/test-free.c
--- linux-4.2/tools/testing/selftests/kdbus/test-free.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-free.c	2015-09-08 00:48:22.248362932 -0300
@@ -0,0 +1,64 @@
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include "kdbus-api.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+#include "kdbus-test.h"
+
+static int sample_ioctl_call(struct kdbus_test_env *env)
+{
+	int ret;
+	struct kdbus_cmd_list cmd_list = {
+		.flags = KDBUS_LIST_QUEUED,
+		.size = sizeof(cmd_list),
+	};
+
+	ret = kdbus_cmd_list(env->conn->fd, &cmd_list);
+	ASSERT_RETURN(ret == 0);
+
+	/* DON'T FREE THIS SLICE OF MEMORY! */
+
+	return TEST_OK;
+}
+
+int kdbus_test_free(struct kdbus_test_env *env)
+{
+	int ret;
+	struct kdbus_cmd_free cmd_free = {};
+
+	/* free an unallocated buffer */
+	cmd_free.size = sizeof(cmd_free);
+	cmd_free.flags = 0;
+	cmd_free.offset = 0;
+	ret = kdbus_cmd_free(env->conn->fd, &cmd_free);
+	ASSERT_RETURN(ret == -ENXIO);
+
+	/* free a buffer out of the pool's bounds */
+	cmd_free.size = sizeof(cmd_free);
+	cmd_free.offset = POOL_SIZE + 1;
+	ret = kdbus_cmd_free(env->conn->fd, &cmd_free);
+	ASSERT_RETURN(ret == -ENXIO);
+
+	/*
+	 * The user application is responsible for freeing the allocated
+	 * memory with the KDBUS_CMD_FREE ioctl, so let's test what happens
+	 * if we forget about it.
+	 */
+
+	ret = sample_ioctl_call(env);
+	ASSERT_RETURN(ret == 0);
+
+	ret = sample_ioctl_call(env);
+	ASSERT_RETURN(ret == 0);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-match.c linux-4.2-pck/tools/testing/selftests/kdbus/test-match.c
--- linux-4.2/tools/testing/selftests/kdbus/test-match.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-match.c	2015-09-08 00:48:22.248362932 -0300
@@ -0,0 +1,441 @@
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include "kdbus-api.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+#include "kdbus-test.h"
+
+int kdbus_test_match_id_add(struct kdbus_test_env *env)
+{
+	struct {
+		struct kdbus_cmd_match cmd;
+		struct {
+			uint64_t size;
+			uint64_t type;
+			struct kdbus_notify_id_change chg;
+		} item;
+	} buf;
+	struct kdbus_conn *conn;
+	struct kdbus_msg *msg;
+	int ret;
+
+	memset(&buf, 0, sizeof(buf));
+
+	buf.cmd.size = sizeof(buf);
+	buf.cmd.cookie = 0xdeafbeefdeaddead;
+	buf.item.size = sizeof(buf.item);
+	buf.item.type = KDBUS_ITEM_ID_ADD;
+	buf.item.chg.id = KDBUS_MATCH_ID_ANY;
+
+	/* match on id add */
+	ret = kdbus_cmd_match_add(env->conn->fd, &buf.cmd);
+	ASSERT_RETURN(ret == 0);
+
+	/* create 2nd connection */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn != NULL);
+
+	/* 1st connection should have received a notification */
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ASSERT_RETURN(msg->items[0].type == KDBUS_ITEM_ID_ADD);
+	ASSERT_RETURN(msg->items[0].id_change.id == conn->id);
+
+	kdbus_conn_free(conn);
+
+	return TEST_OK;
+}
+
+int kdbus_test_match_id_remove(struct kdbus_test_env *env)
+{
+	struct {
+		struct kdbus_cmd_match cmd;
+		struct {
+			uint64_t size;
+			uint64_t type;
+			struct kdbus_notify_id_change chg;
+		} item;
+	} buf;
+	struct kdbus_conn *conn;
+	struct kdbus_msg *msg;
+	size_t id;
+	int ret;
+
+	/* create 2nd connection */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn != NULL);
+	id = conn->id;
+
+	memset(&buf, 0, sizeof(buf));
+	buf.cmd.size = sizeof(buf);
+	buf.cmd.cookie = 0xdeafbeefdeaddead;
+	buf.item.size = sizeof(buf.item);
+	buf.item.type = KDBUS_ITEM_ID_REMOVE;
+	buf.item.chg.id = id;
+
+	/* register match on 2nd connection */
+	ret = kdbus_cmd_match_add(env->conn->fd, &buf.cmd);
+	ASSERT_RETURN(ret == 0);
+
+	/* remove 2nd connection again */
+	kdbus_conn_free(conn);
+
+	/* 1st connection should have received a notification */
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ASSERT_RETURN(msg->items[0].type == KDBUS_ITEM_ID_REMOVE);
+	ASSERT_RETURN(msg->items[0].id_change.id == id);
+
+	return TEST_OK;
+}
+
+int kdbus_test_match_replace(struct kdbus_test_env *env)
+{
+	struct {
+		struct kdbus_cmd_match cmd;
+		struct {
+			uint64_t size;
+			uint64_t type;
+			struct kdbus_notify_id_change chg;
+		} item;
+	} buf;
+	struct kdbus_conn *conn;
+	struct kdbus_msg *msg;
+	size_t id;
+	int ret;
+
+	/* add a match to id_add */
+	ASSERT_RETURN(kdbus_test_match_id_add(env) == TEST_OK);
+
+	/* do a replace of the match from id_add to id_remove */
+	memset(&buf, 0, sizeof(buf));
+
+	buf.cmd.size = sizeof(buf);
+	buf.cmd.cookie = 0xdeafbeefdeaddead;
+	buf.cmd.flags = KDBUS_MATCH_REPLACE;
+	buf.item.size = sizeof(buf.item);
+	buf.item.type = KDBUS_ITEM_ID_REMOVE;
+	buf.item.chg.id = KDBUS_MATCH_ID_ANY;
+
+	ret = kdbus_cmd_match_add(env->conn->fd, &buf.cmd);
+
+	/* create 2nd connection */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn != NULL);
+	id = conn->id;
+
+	/* 1st connection should _not_ have received a notification */
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret != 0);
+
+	/* remove 2nd connection */
+	kdbus_conn_free(conn);
+
+	/* 1st connection should _now_ have received a notification */
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ASSERT_RETURN(msg->items[0].type == KDBUS_ITEM_ID_REMOVE);
+	ASSERT_RETURN(msg->items[0].id_change.id == id);
+
+	return TEST_OK;
+}
+
+int kdbus_test_match_name_add(struct kdbus_test_env *env)
+{
+	struct {
+		struct kdbus_cmd_match cmd;
+		struct {
+			uint64_t size;
+			uint64_t type;
+			struct kdbus_notify_name_change chg;
+		} item;
+		char name[64];
+	} buf;
+	struct kdbus_msg *msg;
+	char *name;
+	int ret;
+
+	name = "foo.bla.blaz";
+
+	/* install the match rule */
+	memset(&buf, 0, sizeof(buf));
+	buf.item.type = KDBUS_ITEM_NAME_ADD;
+	buf.item.chg.old_id.id = KDBUS_MATCH_ID_ANY;
+	buf.item.chg.new_id.id = KDBUS_MATCH_ID_ANY;
+	strncpy(buf.name, name, sizeof(buf.name) - 1);
+	buf.item.size = sizeof(buf.item) + strlen(buf.name) + 1;
+	buf.cmd.size = sizeof(buf.cmd) + buf.item.size;
+
+	ret = kdbus_cmd_match_add(env->conn->fd, &buf.cmd);
+	ASSERT_RETURN(ret == 0);
+
+	/* acquire the name */
+	ret = kdbus_name_acquire(env->conn, name, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* we should have received a notification */
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ASSERT_RETURN(msg->items[0].type == KDBUS_ITEM_NAME_ADD);
+	ASSERT_RETURN(msg->items[0].name_change.old_id.id == 0);
+	ASSERT_RETURN(msg->items[0].name_change.new_id.id == env->conn->id);
+	ASSERT_RETURN(strcmp(msg->items[0].name_change.name, name) == 0);
+
+	return TEST_OK;
+}
+
+int kdbus_test_match_name_remove(struct kdbus_test_env *env)
+{
+	struct {
+		struct kdbus_cmd_match cmd;
+		struct {
+			uint64_t size;
+			uint64_t type;
+			struct kdbus_notify_name_change chg;
+		} item;
+		char name[64];
+	} buf;
+	struct kdbus_msg *msg;
+	char *name;
+	int ret;
+
+	name = "foo.bla.blaz";
+
+	/* acquire the name */
+	ret = kdbus_name_acquire(env->conn, name, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* install the match rule */
+	memset(&buf, 0, sizeof(buf));
+	buf.item.type = KDBUS_ITEM_NAME_REMOVE;
+	buf.item.chg.old_id.id = KDBUS_MATCH_ID_ANY;
+	buf.item.chg.new_id.id = KDBUS_MATCH_ID_ANY;
+	strncpy(buf.name, name, sizeof(buf.name) - 1);
+	buf.item.size = sizeof(buf.item) + strlen(buf.name) + 1;
+	buf.cmd.size = sizeof(buf.cmd) + buf.item.size;
+
+	ret = kdbus_cmd_match_add(env->conn->fd, &buf.cmd);
+	ASSERT_RETURN(ret == 0);
+
+	/* release the name again */
+	kdbus_name_release(env->conn, name);
+	ASSERT_RETURN(ret == 0);
+
+	/* we should have received a notification */
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ASSERT_RETURN(msg->items[0].type == KDBUS_ITEM_NAME_REMOVE);
+	ASSERT_RETURN(msg->items[0].name_change.old_id.id == env->conn->id);
+	ASSERT_RETURN(msg->items[0].name_change.new_id.id == 0);
+	ASSERT_RETURN(strcmp(msg->items[0].name_change.name, name) == 0);
+
+	return TEST_OK;
+}
+
+int kdbus_test_match_name_change(struct kdbus_test_env *env)
+{
+	struct {
+		struct kdbus_cmd_match cmd;
+		struct {
+			uint64_t size;
+			uint64_t type;
+			struct kdbus_notify_name_change chg;
+		} item;
+		char name[64];
+	} buf;
+	struct kdbus_conn *conn;
+	struct kdbus_msg *msg;
+	uint64_t flags;
+	char *name = "foo.bla.baz";
+	int ret;
+
+	/* acquire the name */
+	ret = kdbus_name_acquire(env->conn, name, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* install the match rule */
+	memset(&buf, 0, sizeof(buf));
+	buf.item.type = KDBUS_ITEM_NAME_CHANGE;
+	buf.item.chg.old_id.id = KDBUS_MATCH_ID_ANY;
+	buf.item.chg.new_id.id = KDBUS_MATCH_ID_ANY;
+	strncpy(buf.name, name, sizeof(buf.name) - 1);
+	buf.item.size = sizeof(buf.item) + strlen(buf.name) + 1;
+	buf.cmd.size = sizeof(buf.cmd) + buf.item.size;
+
+	ret = kdbus_cmd_match_add(env->conn->fd, &buf.cmd);
+	ASSERT_RETURN(ret == 0);
+
+	/* create a 2nd connection */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn != NULL);
+
+	/* allow the new connection to own the same name */
+	/* queue the 2nd connection as waiting owner */
+	flags = KDBUS_NAME_QUEUE;
+	ret = kdbus_name_acquire(conn, name, &flags);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(flags & KDBUS_NAME_IN_QUEUE);
+
+	/* release name from 1st connection */
+	ret = kdbus_name_release(env->conn, name);
+	ASSERT_RETURN(ret == 0);
+
+	/* we should have received a notification */
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ASSERT_RETURN(msg->items[0].type == KDBUS_ITEM_NAME_CHANGE);
+	ASSERT_RETURN(msg->items[0].name_change.old_id.id == env->conn->id);
+	ASSERT_RETURN(msg->items[0].name_change.new_id.id == conn->id);
+	ASSERT_RETURN(strcmp(msg->items[0].name_change.name, name) == 0);
+
+	kdbus_conn_free(conn);
+
+	return TEST_OK;
+}
+
+static int send_bloom_filter(const struct kdbus_conn *conn,
+			     uint64_t cookie,
+			     const uint8_t *filter,
+			     size_t filter_size,
+			     uint64_t filter_generation)
+{
+	struct kdbus_cmd_send cmd = {};
+	struct kdbus_msg *msg;
+	struct kdbus_item *item;
+	uint64_t size;
+	int ret;
+
+	size = sizeof(struct kdbus_msg);
+	size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_bloom_filter)) + filter_size;
+
+	msg = alloca(size);
+
+	memset(msg, 0, size);
+	msg->size = size;
+	msg->src_id = conn->id;
+	msg->dst_id = KDBUS_DST_ID_BROADCAST;
+	msg->flags = KDBUS_MSG_SIGNAL;
+	msg->payload_type = KDBUS_PAYLOAD_DBUS;
+	msg->cookie = cookie;
+
+	item = msg->items;
+	item->type = KDBUS_ITEM_BLOOM_FILTER;
+	item->size = KDBUS_ITEM_SIZE(sizeof(struct kdbus_bloom_filter)) +
+				filter_size;
+
+	item->bloom_filter.generation = filter_generation;
+	memcpy(item->bloom_filter.data, filter, filter_size);
+
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)msg;
+
+	ret = kdbus_cmd_send(conn->fd, &cmd);
+	if (ret < 0) {
+		kdbus_printf("error sending message: %d (%m)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int kdbus_test_match_bloom(struct kdbus_test_env *env)
+{
+	struct {
+		struct kdbus_cmd_match cmd;
+		struct {
+			uint64_t size;
+			uint64_t type;
+			uint8_t data_gen0[64];
+			uint8_t data_gen1[64];
+		} item;
+	} buf;
+	struct kdbus_conn *conn;
+	struct kdbus_msg *msg;
+	uint64_t cookie = 0xf000f00f;
+	uint8_t filter[64];
+	int ret;
+
+	/* install the match rule */
+	memset(&buf, 0, sizeof(buf));
+	buf.cmd.size = sizeof(buf);
+
+	buf.item.size = sizeof(buf.item);
+	buf.item.type = KDBUS_ITEM_BLOOM_MASK;
+	buf.item.data_gen0[0] = 0x55;
+	buf.item.data_gen0[63] = 0x80;
+
+	buf.item.data_gen1[1] = 0xaa;
+	buf.item.data_gen1[9] = 0x02;
+
+	ret = kdbus_cmd_match_add(env->conn->fd, &buf.cmd);
+	ASSERT_RETURN(ret == 0);
+
+	/* create a 2nd connection */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn != NULL);
+
+	/* a message with a 0'ed out filter must not reach the other peer */
+	memset(filter, 0, sizeof(filter));
+	ret = send_bloom_filter(conn, ++cookie, filter, sizeof(filter), 0);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	/* now set the filter to the connection's mask and expect success */
+	filter[0] = 0x55;
+	filter[63] = 0x80;
+	ret = send_bloom_filter(conn, ++cookie, filter, sizeof(filter), 0);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == cookie);
+
+	/* broaden the filter and try again. this should also succeed. */
+	filter[0] = 0xff;
+	filter[8] = 0xff;
+	filter[63] = 0xff;
+	ret = send_bloom_filter(conn, ++cookie, filter, sizeof(filter), 0);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == cookie);
+
+	/* the same filter must not match against bloom generation 1 */
+	ret = send_bloom_filter(conn, ++cookie, filter, sizeof(filter), 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	/* set a different filter and try again */
+	filter[1] = 0xaa;
+	filter[9] = 0x02;
+	ret = send_bloom_filter(conn, ++cookie, filter, sizeof(filter), 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(env->conn, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == cookie);
+
+	kdbus_conn_free(conn);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-message.c linux-4.2-pck/tools/testing/selftests/kdbus/test-message.c
--- linux-4.2/tools/testing/selftests/kdbus/test-message.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-message.c	2015-09-08 00:48:22.248362932 -0300
@@ -0,0 +1,736 @@
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <time.h>
+#include <stdbool.h>
+#include <sys/eventfd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "kdbus-api.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+#include "kdbus-test.h"
+
+/* maximum number of queued messages from the same individual user */
+#define KDBUS_CONN_MAX_MSGS			256
+
+/* maximum number of queued requests waiting for a reply */
+#define KDBUS_CONN_MAX_REQUESTS_PENDING		128
+
+/* maximum message payload size */
+#define KDBUS_MSG_MAX_PAYLOAD_VEC_SIZE		(2 * 1024UL * 1024UL)
+
+int kdbus_test_message_basic(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *conn;
+	struct kdbus_conn *sender;
+	struct kdbus_msg *msg;
+	uint64_t cookie = 0x1234abcd5678eeff;
+	uint64_t offset;
+	int ret;
+
+	sender = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(sender != NULL);
+
+	/* create a 2nd connection */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn != NULL);
+
+	ret = kdbus_add_match_empty(conn);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_add_match_empty(sender);
+	ASSERT_RETURN(ret == 0);
+
+	/* send over 1st connection */
+	ret = kdbus_msg_send(sender, NULL, cookie, 0, 0, 0,
+			     KDBUS_DST_ID_BROADCAST);
+	ASSERT_RETURN(ret == 0);
+
+	/* Make sure that we do get our own broadcasts */
+	ret = kdbus_msg_recv(sender, &msg, &offset);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == cookie);
+
+	kdbus_msg_free(msg);
+
+	/* ... and receive on the 2nd */
+	ret = kdbus_msg_recv_poll(conn, 100, &msg, &offset);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == cookie);
+
+	kdbus_msg_free(msg);
+
+	/* Msgs that expect a reply must have timeout and cookie */
+	ret = kdbus_msg_send(sender, NULL, 0, KDBUS_MSG_EXPECT_REPLY,
+			     0, 0, conn->id);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	/* Faked replies with a valid reply cookie are rejected */
+	ret = kdbus_msg_send_reply(conn, time(NULL) ^ cookie, sender->id);
+	ASSERT_RETURN(ret == -EBADSLT);
+
+	ret = kdbus_free(conn, offset);
+	ASSERT_RETURN(ret == 0);
+
+	kdbus_conn_free(sender);
+	kdbus_conn_free(conn);
+
+	return TEST_OK;
+}
+
+static int msg_recv_prio(struct kdbus_conn *conn,
+			 int64_t requested_prio,
+			 int64_t expected_prio)
+{
+	struct kdbus_cmd_recv recv = {
+		.size = sizeof(recv),
+		.flags = KDBUS_RECV_USE_PRIORITY,
+		.priority = requested_prio,
+	};
+	struct kdbus_msg *msg;
+	int ret;
+
+	ret = kdbus_cmd_recv(conn->fd, &recv);
+	if (ret < 0) {
+		kdbus_printf("error receiving message: %d (%m)\n", -errno);
+		return ret;
+	}
+
+	msg = (struct kdbus_msg *)(conn->buf + recv.msg.offset);
+	kdbus_msg_dump(conn, msg);
+
+	if (msg->priority != expected_prio) {
+		kdbus_printf("expected message prio %lld, got %lld\n",
+			     (unsigned long long) expected_prio,
+			     (unsigned long long) msg->priority);
+		return -EINVAL;
+	}
+
+	kdbus_msg_free(msg);
+	ret = kdbus_free(conn, recv.msg.offset);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int kdbus_test_message_prio(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *a, *b;
+	uint64_t cookie = 0;
+
+	a = kdbus_hello(env->buspath, 0, NULL, 0);
+	b = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(a && b);
+
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0,   25, a->id) == 0);
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0, -600, a->id) == 0);
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0,   10, a->id) == 0);
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0,  -35, a->id) == 0);
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0, -100, a->id) == 0);
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0,   20, a->id) == 0);
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0,  -15, a->id) == 0);
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0, -800, a->id) == 0);
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0, -150, a->id) == 0);
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0,   10, a->id) == 0);
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0, -800, a->id) == 0);
+	ASSERT_RETURN(kdbus_msg_send(b, NULL, ++cookie, 0, 0,  -10, a->id) == 0);
+
+	ASSERT_RETURN(msg_recv_prio(a, -200, -800) == 0);
+	ASSERT_RETURN(msg_recv_prio(a, -100, -800) == 0);
+	ASSERT_RETURN(msg_recv_prio(a, -400, -600) == 0);
+	ASSERT_RETURN(msg_recv_prio(a, -400, -600) == -EAGAIN);
+	ASSERT_RETURN(msg_recv_prio(a, 10, -150) == 0);
+	ASSERT_RETURN(msg_recv_prio(a, 10, -100) == 0);
+
+	kdbus_printf("--- get priority (all)\n");
+	ASSERT_RETURN(kdbus_msg_recv(a, NULL, NULL) == 0);
+
+	kdbus_conn_free(a);
+	kdbus_conn_free(b);
+
+	return TEST_OK;
+}
+
+static int kdbus_test_notify_kernel_quota(struct kdbus_test_env *env)
+{
+	int ret;
+	unsigned int i;
+	struct kdbus_conn *conn;
+	struct kdbus_conn *reader;
+	struct kdbus_msg *msg = NULL;
+	struct kdbus_cmd_recv recv = { .size = sizeof(recv) };
+
+	reader = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(reader);
+
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn);
+
+	/* Register for ID signals */
+	ret = kdbus_add_match_id(reader, 0x1, KDBUS_ITEM_ID_ADD,
+				 KDBUS_MATCH_ID_ANY);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_add_match_id(reader, 0x2, KDBUS_ITEM_ID_REMOVE,
+				 KDBUS_MATCH_ID_ANY);
+	ASSERT_RETURN(ret == 0);
+
+	/* Each iteration two notifications: add and remove ID */
+	for (i = 0; i < KDBUS_CONN_MAX_MSGS / 2; i++) {
+		struct kdbus_conn *notifier;
+
+		notifier = kdbus_hello(env->buspath, 0, NULL, 0);
+		ASSERT_RETURN(notifier);
+
+		kdbus_conn_free(notifier);
+	}
+
+	/*
+	 * Now the reader queue is full with kernel notfications,
+	 * but as a user we still have room to push our messages.
+	 */
+	ret = kdbus_msg_send(conn, NULL, 0xdeadbeef, 0, 0, 0, reader->id);
+	ASSERT_RETURN(ret == 0);
+
+	/* More ID kernel notifications that will be lost */
+	kdbus_conn_free(conn);
+
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn);
+
+	kdbus_conn_free(conn);
+
+	/*
+	 * We lost only 3 packets since only signal msgs are
+	 * accounted. The connection ID add/remove notification
+	 */
+	ret = kdbus_cmd_recv(reader->fd, &recv);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(recv.return_flags & KDBUS_RECV_RETURN_DROPPED_MSGS);
+	ASSERT_RETURN(recv.dropped_msgs == 3);
+
+	msg = (struct kdbus_msg *)(reader->buf + recv.msg.offset);
+	kdbus_msg_free(msg);
+
+	/* Read our queue */
+	for (i = 0; i < KDBUS_CONN_MAX_MSGS - 1; i++) {
+		memset(&recv, 0, sizeof(recv));
+		recv.size = sizeof(recv);
+
+		ret = kdbus_cmd_recv(reader->fd, &recv);
+		ASSERT_RETURN(ret == 0);
+		ASSERT_RETURN(!(recv.return_flags &
+			        KDBUS_RECV_RETURN_DROPPED_MSGS));
+
+		msg = (struct kdbus_msg *)(reader->buf + recv.msg.offset);
+		kdbus_msg_free(msg);
+	}
+
+	ret = kdbus_msg_recv(reader, NULL, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(reader, NULL, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	kdbus_conn_free(reader);
+
+	return 0;
+}
+
+/* Return the number of message successfully sent */
+static int kdbus_fill_conn_queue(struct kdbus_conn *conn_src,
+				 uint64_t dst_id,
+				 unsigned int max_msgs)
+{
+	unsigned int i;
+	uint64_t cookie = 0;
+	size_t size;
+	struct kdbus_cmd_send cmd = {};
+	struct kdbus_msg *msg;
+	int ret;
+
+	size = sizeof(struct kdbus_msg);
+	msg = malloc(size);
+	ASSERT_RETURN_VAL(msg, -ENOMEM);
+
+	memset(msg, 0, size);
+	msg->size = size;
+	msg->src_id = conn_src->id;
+	msg->dst_id = dst_id;
+	msg->payload_type = KDBUS_PAYLOAD_DBUS;
+
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)msg;
+
+	for (i = 0; i < max_msgs; i++) {
+		msg->cookie = cookie++;
+		ret = kdbus_cmd_send(conn_src->fd, &cmd);
+		if (ret < 0)
+			break;
+	}
+
+	free(msg);
+
+	return i;
+}
+
+static int kdbus_test_activator_quota(struct kdbus_test_env *env)
+{
+	int ret;
+	unsigned int i;
+	unsigned int activator_msgs_count = 0;
+	uint64_t cookie = time(NULL);
+	struct kdbus_conn *conn;
+	struct kdbus_conn *sender;
+	struct kdbus_conn *activator;
+	struct kdbus_msg *msg;
+	uint64_t flags;
+	struct kdbus_cmd_recv recv = { .size = sizeof(recv) };
+	struct kdbus_policy_access access = {
+		.type = KDBUS_POLICY_ACCESS_USER,
+		.id = geteuid(),
+		.access = KDBUS_POLICY_OWN,
+	};
+
+	activator = kdbus_hello_activator(env->buspath, "foo.test.activator",
+					  &access, 1);
+	ASSERT_RETURN(activator);
+
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	sender = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn || sender);
+
+	ret = kdbus_list(sender, KDBUS_LIST_NAMES |
+				 KDBUS_LIST_UNIQUE |
+				 KDBUS_LIST_ACTIVATORS |
+				 KDBUS_LIST_QUEUED);
+	ASSERT_RETURN(ret == 0);
+
+	for (i = 0; i < KDBUS_CONN_MAX_MSGS; i++) {
+		ret = kdbus_msg_send(sender, "foo.test.activator",
+				     cookie++, 0, 0, 0,
+				     KDBUS_DST_ID_NAME);
+		if (ret < 0)
+			break;
+		activator_msgs_count++;
+	}
+
+	/* we must have at least sent one message */
+	ASSERT_RETURN_VAL(i > 0, -errno);
+	ASSERT_RETURN(ret == -ENOBUFS);
+
+	/* Good, activator queue is full now */
+
+	/* ENXIO on direct send (activators can never be addressed by ID) */
+	ret = kdbus_msg_send(conn, NULL, cookie++, 0, 0, 0, activator->id);
+	ASSERT_RETURN(ret == -ENXIO);
+
+	/* can't queue more */
+	ret = kdbus_msg_send(conn, "foo.test.activator", cookie++,
+			     0, 0, 0, KDBUS_DST_ID_NAME);
+	ASSERT_RETURN(ret == -ENOBUFS);
+
+	/* no match installed, so the broadcast will not inc dropped_msgs */
+	ret = kdbus_msg_send(sender, NULL, cookie++, 0, 0, 0,
+			     KDBUS_DST_ID_BROADCAST);
+	ASSERT_RETURN(ret == 0);
+
+	/* Check activator queue */
+	ret = kdbus_cmd_recv(activator->fd, &recv);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(recv.dropped_msgs == 0);
+
+	activator_msgs_count--;
+
+	msg = (struct kdbus_msg *)(activator->buf + recv.msg.offset);
+	kdbus_msg_free(msg);
+
+
+	/* Stage 1) of test check the pool memory quota */
+
+	/* Consume the connection pool memory */
+	for (i = 0; i < KDBUS_CONN_MAX_MSGS; i++) {
+		ret = kdbus_msg_send(sender, NULL,
+				     cookie++, 0, 0, 0, conn->id);
+		if (ret < 0)
+			break;
+	}
+
+	/* consume one message, so later at least one can be moved */
+	memset(&recv, 0, sizeof(recv));
+	recv.size = sizeof(recv);
+	ret = kdbus_cmd_recv(conn->fd, &recv);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(recv.dropped_msgs == 0);
+	msg = (struct kdbus_msg *)(conn->buf + recv.msg.offset);
+	kdbus_msg_free(msg);
+
+	/* Try to acquire the name now */
+	flags = KDBUS_NAME_REPLACE_EXISTING;
+	ret = kdbus_name_acquire(conn, "foo.test.activator", &flags);
+	ASSERT_RETURN(ret == 0);
+
+	/* try to read messages and see if we have lost some */
+	memset(&recv, 0, sizeof(recv));
+	recv.size = sizeof(recv);
+	ret = kdbus_cmd_recv(conn->fd, &recv);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(recv.dropped_msgs != 0);
+
+	/* number of dropped msgs < received ones (at least one was moved) */
+	ASSERT_RETURN(recv.dropped_msgs < activator_msgs_count);
+
+	/* Deduct the number of dropped msgs from the activator msgs */
+	activator_msgs_count -= recv.dropped_msgs;
+
+	msg = (struct kdbus_msg *)(activator->buf + recv.msg.offset);
+	kdbus_msg_free(msg);
+
+	/*
+	 * Release the name and hand it back to activator, now
+	 * we should have 'activator_msgs_count' msgs again in
+	 * the activator queue
+	 */
+	ret = kdbus_name_release(conn, "foo.test.activator");
+	ASSERT_RETURN(ret == 0);
+
+	/* make sure that we got our previous activator msgs */
+	ret = kdbus_msg_recv(activator, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->src_id == sender->id);
+
+	activator_msgs_count--;
+
+	kdbus_msg_free(msg);
+
+
+	/* Stage 2) of test check max message quota */
+
+	/* Empty conn queue */
+	for (i = 0; i < KDBUS_CONN_MAX_MSGS; i++) {
+		ret = kdbus_msg_recv(conn, NULL, NULL);
+		if (ret == -EAGAIN)
+			break;
+	}
+
+	/* fill queue with max msgs quota */
+	ret = kdbus_fill_conn_queue(sender, conn->id, KDBUS_CONN_MAX_MSGS);
+	ASSERT_RETURN(ret == KDBUS_CONN_MAX_MSGS);
+
+	/* This one is lost but it is not accounted */
+	ret = kdbus_msg_send(sender, NULL,
+			     cookie++, 0, 0, 0, conn->id);
+	ASSERT_RETURN(ret == -ENOBUFS);
+
+	/* Acquire the name again */
+	flags = KDBUS_NAME_REPLACE_EXISTING;
+	ret = kdbus_name_acquire(conn, "foo.test.activator", &flags);
+	ASSERT_RETURN(ret == 0);
+
+	memset(&recv, 0, sizeof(recv));
+	recv.size = sizeof(recv);
+
+	/*
+	 * Try to read messages and make sure that we have lost all
+	 * the activator messages due to quota checks. Our queue is
+	 * already full.
+	 */
+	ret = kdbus_cmd_recv(conn->fd, &recv);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(recv.dropped_msgs == activator_msgs_count);
+
+	msg = (struct kdbus_msg *)(activator->buf + recv.msg.offset);
+	kdbus_msg_free(msg);
+
+	kdbus_conn_free(sender);
+	kdbus_conn_free(conn);
+	kdbus_conn_free(activator);
+
+	return 0;
+}
+
+static int kdbus_test_expected_reply_quota(struct kdbus_test_env *env)
+{
+	int ret;
+	unsigned int i, n;
+	unsigned int count;
+	uint64_t cookie = 0x1234abcd5678eeff;
+	struct kdbus_conn *conn;
+	struct kdbus_conn *connections[9];
+
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn);
+
+	for (i = 0; i < 9; i++) {
+		connections[i] = kdbus_hello(env->buspath, 0, NULL, 0);
+		ASSERT_RETURN(connections[i]);
+	}
+
+	count = 0;
+	/* Send 16 messages to 8 different connections */
+	for (i = 0; i < 8; i++) {
+		for (n = 0; n < 16; n++) {
+			ret = kdbus_msg_send(conn, NULL, cookie++,
+					     KDBUS_MSG_EXPECT_REPLY,
+					     100000000ULL, 0,
+					     connections[i]->id);
+			if (ret < 0)
+				break;
+
+			count++;
+		}
+	}
+
+	/*
+	 * We should have queued at least
+	 * KDBUS_CONN_MAX_REQUESTS_PENDING method call
+	 */
+	ASSERT_RETURN(count == KDBUS_CONN_MAX_REQUESTS_PENDING);
+
+	/*
+	 * Now try to send a message to the last connection,
+	 * if we have reached KDBUS_CONN_MAX_REQUESTS_PENDING
+	 * no further requests are allowed
+	 */
+	ret = kdbus_msg_send(conn, NULL, cookie++, KDBUS_MSG_EXPECT_REPLY,
+			     1000000000ULL, 0, connections[8]->id);
+	ASSERT_RETURN(ret == -EMLINK);
+
+	for (i = 0; i < 9; i++)
+		kdbus_conn_free(connections[i]);
+
+	kdbus_conn_free(conn);
+
+	return 0;
+}
+
+int kdbus_test_pool_quota(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *a, *b, *c;
+	struct kdbus_cmd_send cmd = {};
+	struct kdbus_item *item;
+	struct kdbus_msg *recv_msg;
+	struct kdbus_msg *msg;
+	uint64_t cookie = time(NULL);
+	uint64_t size;
+	unsigned int i;
+	char *payload;
+	int ret;
+
+	/* just a guard */
+	if (POOL_SIZE <= KDBUS_MSG_MAX_PAYLOAD_VEC_SIZE ||
+	    POOL_SIZE % KDBUS_MSG_MAX_PAYLOAD_VEC_SIZE != 0)
+		return 0;
+
+	payload = calloc(KDBUS_MSG_MAX_PAYLOAD_VEC_SIZE, sizeof(char));
+	ASSERT_RETURN_VAL(payload, -ENOMEM);
+
+	a = kdbus_hello(env->buspath, 0, NULL, 0);
+	b = kdbus_hello(env->buspath, 0, NULL, 0);
+	c = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(a && b && c);
+
+	size = sizeof(struct kdbus_msg);
+	size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_vec));
+
+	msg = malloc(size);
+	ASSERT_RETURN_VAL(msg, -ENOMEM);
+
+	memset(msg, 0, size);
+	msg->size = size;
+	msg->src_id = a->id;
+	msg->dst_id = c->id;
+	msg->payload_type = KDBUS_PAYLOAD_DBUS;
+
+	item = msg->items;
+	item->type = KDBUS_ITEM_PAYLOAD_VEC;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+	item->vec.address = (uintptr_t)payload;
+	item->vec.size = KDBUS_MSG_MAX_PAYLOAD_VEC_SIZE;
+	item = KDBUS_ITEM_NEXT(item);
+
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)msg;
+
+	/*
+	 * Send 2097248 bytes, a user is only allowed to get 33% of half of
+	 * the free space of the pool, the already used space is
+	 * accounted as free space
+	 */
+	size += KDBUS_MSG_MAX_PAYLOAD_VEC_SIZE;
+	for (i = size; i < (POOL_SIZE / 2 / 3); i += size) {
+		msg->cookie = cookie++;
+
+		ret = kdbus_cmd_send(a->fd, &cmd);
+		ASSERT_RETURN_VAL(ret == 0, ret);
+	}
+
+	/* Try to get more than 33% */
+	msg->cookie = cookie++;
+	ret = kdbus_cmd_send(a->fd, &cmd);
+	ASSERT_RETURN(ret == -ENOBUFS);
+
+	/* We still can pass small messages */
+	ret = kdbus_msg_send(b, NULL, cookie++, 0, 0, 0, c->id);
+	ASSERT_RETURN(ret == 0);
+
+	for (i = size; i < (POOL_SIZE / 2 / 3); i += size) {
+		ret = kdbus_msg_recv(c, &recv_msg, NULL);
+		ASSERT_RETURN(ret == 0);
+		ASSERT_RETURN(recv_msg->src_id == a->id);
+
+		kdbus_msg_free(recv_msg);
+	}
+
+	ret = kdbus_msg_recv(c, &recv_msg, NULL);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(recv_msg->src_id == b->id);
+
+	kdbus_msg_free(recv_msg);
+
+	ret = kdbus_msg_recv(c, NULL, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	free(msg);
+	free(payload);
+
+	kdbus_conn_free(c);
+	kdbus_conn_free(b);
+	kdbus_conn_free(a);
+
+	return 0;
+}
+
+int kdbus_test_message_quota(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *a, *b;
+	uint64_t cookie = 0;
+	int ret;
+	int i;
+
+	ret = kdbus_test_activator_quota(env);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_test_notify_kernel_quota(env);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_test_pool_quota(env);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_test_expected_reply_quota(env);
+	ASSERT_RETURN(ret == 0);
+
+	a = kdbus_hello(env->buspath, 0, NULL, 0);
+	b = kdbus_hello(env->buspath, 0, NULL, 0);
+
+	ret = kdbus_fill_conn_queue(b, a->id, KDBUS_CONN_MAX_MSGS);
+	ASSERT_RETURN(ret == KDBUS_CONN_MAX_MSGS);
+
+	ret = kdbus_msg_send(b, NULL, ++cookie, 0, 0, 0, a->id);
+	ASSERT_RETURN(ret == -ENOBUFS);
+
+	for (i = 0; i < KDBUS_CONN_MAX_MSGS; ++i) {
+		ret = kdbus_msg_recv(a, NULL, NULL);
+		ASSERT_RETURN(ret == 0);
+	}
+
+	ret = kdbus_msg_recv(a, NULL, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	ret = kdbus_fill_conn_queue(b, a->id, KDBUS_CONN_MAX_MSGS + 1);
+	ASSERT_RETURN(ret == KDBUS_CONN_MAX_MSGS);
+
+	ret = kdbus_msg_send(b, NULL, ++cookie, 0, 0, 0, a->id);
+	ASSERT_RETURN(ret == -ENOBUFS);
+
+	kdbus_conn_free(a);
+	kdbus_conn_free(b);
+
+	return TEST_OK;
+}
+
+int kdbus_test_memory_access(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *a, *b;
+	struct kdbus_cmd_send cmd = {};
+	struct kdbus_item *item;
+	struct kdbus_msg *msg;
+	uint64_t test_addr = 0;
+	char line[256];
+	uint64_t size;
+	FILE *f;
+	int ret;
+
+	/*
+	 * Search in /proc/kallsyms for the address of a kernel symbol that
+	 * should always be there, regardless of the config. Use that address
+	 * in a PAYLOAD_VEC item and make sure it's inaccessible.
+	 */
+
+	f = fopen("/proc/kallsyms", "r");
+	if (!f)
+		return TEST_SKIP;
+
+	while (fgets(line, sizeof(line), f)) {
+		char *s = line;
+
+		if (!strsep(&s, " "))
+			continue;
+
+		if (!strsep(&s, " "))
+			continue;
+
+		if (!strncmp(s, "mutex_lock", 10)) {
+			test_addr = strtoull(line, NULL, 16);
+			break;
+		}
+	}
+
+	fclose(f);
+
+	if (!test_addr)
+		return TEST_SKIP;
+
+	a = kdbus_hello(env->buspath, 0, NULL, 0);
+	b = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(a && b);
+
+	size = sizeof(struct kdbus_msg);
+	size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_vec));
+
+	msg = alloca(size);
+	ASSERT_RETURN_VAL(msg, -ENOMEM);
+
+	memset(msg, 0, size);
+	msg->size = size;
+	msg->src_id = a->id;
+	msg->dst_id = b->id;
+	msg->payload_type = KDBUS_PAYLOAD_DBUS;
+
+	item = msg->items;
+	item->type = KDBUS_ITEM_PAYLOAD_VEC;
+	item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+	item->vec.address = test_addr;
+	item->vec.size = sizeof(void*);
+	item = KDBUS_ITEM_NEXT(item);
+
+	cmd.size = sizeof(cmd);
+	cmd.msg_address = (uintptr_t)msg;
+
+	ret = kdbus_cmd_send(a->fd, &cmd);
+	ASSERT_RETURN(ret == -EFAULT);
+
+	kdbus_conn_free(b);
+	kdbus_conn_free(a);
+
+	return 0;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-metadata-ns.c linux-4.2-pck/tools/testing/selftests/kdbus/test-metadata-ns.c
--- linux-4.2/tools/testing/selftests/kdbus/test-metadata-ns.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-metadata-ns.c	2015-09-08 00:48:22.248362932 -0300
@@ -0,0 +1,500 @@
+/*
+ * Test metadata in new namespaces. Even if our tests can run
+ * in a namespaced setup, this test is necessary so we can inspect
+ * metadata on the same kdbusfs but between multiple namespaces
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <time.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/eventfd.h>
+#include <sys/syscall.h>
+#include <sys/capability.h>
+#include <linux/sched.h>
+
+#include "kdbus-test.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+static const struct kdbus_creds privileged_creds = {};
+
+static const struct kdbus_creds unmapped_creds = {
+	.uid	= UNPRIV_UID,
+	.euid	= UNPRIV_UID,
+	.suid	= UNPRIV_UID,
+	.fsuid	= UNPRIV_UID,
+	.gid	= UNPRIV_GID,
+	.egid	= UNPRIV_GID,
+	.sgid	= UNPRIV_GID,
+	.fsgid	= UNPRIV_GID,
+};
+
+static const struct kdbus_pids unmapped_pids = {};
+
+/* Get only the first item */
+static struct kdbus_item *kdbus_get_item(struct kdbus_msg *msg,
+					 uint64_t type)
+{
+	struct kdbus_item *item;
+
+	KDBUS_ITEM_FOREACH(item, msg, items)
+		if (item->type == type)
+			return item;
+
+	return NULL;
+}
+
+static int kdbus_match_kdbus_creds(struct kdbus_msg *msg,
+				   const struct kdbus_creds *expected_creds)
+{
+	struct kdbus_item *item;
+
+	item = kdbus_get_item(msg, KDBUS_ITEM_CREDS);
+	ASSERT_RETURN(item);
+
+	ASSERT_RETURN(memcmp(&item->creds, expected_creds,
+			     sizeof(struct kdbus_creds)) == 0);
+
+	return 0;
+}
+
+static int kdbus_match_kdbus_pids(struct kdbus_msg *msg,
+				  const struct kdbus_pids *expected_pids)
+{
+	struct kdbus_item *item;
+
+	item = kdbus_get_item(msg, KDBUS_ITEM_PIDS);
+	ASSERT_RETURN(item);
+
+	ASSERT_RETURN(memcmp(&item->pids, expected_pids,
+			     sizeof(struct kdbus_pids)) == 0);
+
+	return 0;
+}
+
+static int __kdbus_clone_userns_test(const char *bus,
+				     struct kdbus_conn *conn,
+				     uint64_t grandpa_pid,
+				     int signal_fd)
+{
+	int clone_ret;
+	int ret;
+	struct kdbus_msg *msg = NULL;
+	const struct kdbus_item *item;
+	uint64_t cookie = time(NULL) ^ 0xdeadbeef;
+	struct kdbus_conn *unpriv_conn = NULL;
+	struct kdbus_pids parent_pids = {
+		.pid = getppid(),
+		.tid = getppid(),
+		.ppid = grandpa_pid,
+	};
+
+	ret = drop_privileges(UNPRIV_UID, UNPRIV_GID);
+	ASSERT_EXIT(ret == 0);
+
+	unpriv_conn = kdbus_hello(bus, 0, NULL, 0);
+	ASSERT_EXIT(unpriv_conn);
+
+	ret = kdbus_add_match_empty(unpriv_conn);
+	ASSERT_EXIT(ret == 0);
+
+	/*
+	 * ping privileged connection from this new unprivileged
+	 * one
+	 */
+
+	ret = kdbus_msg_send(unpriv_conn, NULL, cookie, 0, 0,
+			     0, conn->id);
+	ASSERT_EXIT(ret == 0);
+
+	/*
+	 * Since we just dropped privileges, the dumpable flag
+	 * was just cleared which makes the /proc/$clone_child/uid_map
+	 * to be owned by root, hence any userns uid mapping will fail
+	 * with -EPERM since the mapping will be done by uid 65534.
+	 *
+	 * To avoid this set the dumpable flag again which makes
+	 * procfs update the /proc/$clone_child/ inodes owner to 65534.
+	 *
+	 * Using this we will be able write to /proc/$clone_child/uid_map
+	 * as uid 65534 and map the uid 65534 to 0 inside the user namespace.
+	 */
+	ret = prctl(PR_SET_DUMPABLE, SUID_DUMP_USER);
+	ASSERT_EXIT(ret == 0);
+
+	/* Make child privileged in its new userns and run tests */
+
+	ret = RUN_CLONE_CHILD(&clone_ret,
+			      SIGCHLD | CLONE_NEWUSER | CLONE_NEWPID,
+	({ 0;  /* Clone setup, nothing */ }),
+	({
+		eventfd_t event_status = 0;
+		struct kdbus_conn *userns_conn;
+
+		/* ping connection from the new user namespace */
+		userns_conn = kdbus_hello(bus, 0, NULL, 0);
+		ASSERT_EXIT(userns_conn);
+
+		ret = kdbus_add_match_empty(userns_conn);
+		ASSERT_EXIT(ret == 0);
+
+		cookie++;
+		ret = kdbus_msg_send(userns_conn, NULL, cookie,
+				     0, 0, 0, conn->id);
+		ASSERT_EXIT(ret == 0);
+
+		/* Parent did send */
+		ret = eventfd_read(signal_fd, &event_status);
+		ASSERT_RETURN(ret >= 0 && event_status == 1);
+
+		/*
+		 * Receive from privileged connection
+		 */
+		kdbus_printf("Privileged → unprivileged/privileged "
+			     "in its userns "
+			     "(different userns and pidns):\n");
+		ret = kdbus_msg_recv_poll(userns_conn, 300, &msg, NULL);
+		ASSERT_EXIT(ret == 0);
+		ASSERT_EXIT(msg->dst_id == userns_conn->id);
+
+		item = kdbus_get_item(msg, KDBUS_ITEM_CAPS);
+		ASSERT_EXIT(item);
+
+		/* uid/gid not mapped, so we have unpriv cached creds */
+		ret = kdbus_match_kdbus_creds(msg, &unmapped_creds);
+		ASSERT_EXIT(ret == 0);
+
+		/*
+		 * Diffent pid namepsaces. This is the child pidns
+		 * so it should not see its parent kdbus_pids
+		 */
+		ret = kdbus_match_kdbus_pids(msg, &unmapped_pids);
+		ASSERT_EXIT(ret == 0);
+
+		kdbus_msg_free(msg);
+
+
+		/*
+		 * Receive broadcast from privileged connection
+		 */
+		kdbus_printf("Privileged → unprivileged/privileged "
+			     "in its userns "
+			     "(different userns and pidns):\n");
+		ret = kdbus_msg_recv_poll(userns_conn, 300, &msg, NULL);
+		ASSERT_EXIT(ret == 0);
+		ASSERT_EXIT(msg->dst_id == KDBUS_DST_ID_BROADCAST);
+
+		item = kdbus_get_item(msg, KDBUS_ITEM_CAPS);
+		ASSERT_EXIT(item);
+
+		/* uid/gid not mapped, so we have unpriv cached creds */
+		ret = kdbus_match_kdbus_creds(msg, &unmapped_creds);
+		ASSERT_EXIT(ret == 0);
+
+		/*
+		 * Diffent pid namepsaces. This is the child pidns
+		 * so it should not see its parent kdbus_pids
+		 */
+		ret = kdbus_match_kdbus_pids(msg, &unmapped_pids);
+		ASSERT_EXIT(ret == 0);
+
+		kdbus_msg_free(msg);
+
+		kdbus_conn_free(userns_conn);
+	}),
+	({
+		/* Parent setup map child uid/gid */
+		ret = userns_map_uid_gid(pid, "0 65534 1", "0 65534 1");
+		ASSERT_EXIT(ret == 0);
+	}),
+	({ 0; }));
+	/* Unprivileged was not able to create user namespace */
+	if (clone_ret == -EPERM) {
+		kdbus_printf("-- CLONE_NEWUSER TEST Failed for "
+			     "uid: %u\n -- Make sure that your kernel "
+			     "do not allow CLONE_NEWUSER for "
+			     "unprivileged users\n", UNPRIV_UID);
+		ret = 0;
+		goto out;
+	}
+
+	ASSERT_EXIT(ret == 0);
+
+
+	/*
+	 * Receive from privileged connection
+	 */
+	kdbus_printf("\nPrivileged → unprivileged (same namespaces):\n");
+	ret = kdbus_msg_recv_poll(unpriv_conn, 300, &msg, NULL);
+
+	ASSERT_EXIT(ret == 0);
+	ASSERT_EXIT(msg->dst_id == unpriv_conn->id);
+
+	/* will get the privileged creds */
+	ret = kdbus_match_kdbus_creds(msg, &privileged_creds);
+	ASSERT_EXIT(ret == 0);
+
+	/* Same pidns so will get the kdbus_pids */
+	ret = kdbus_match_kdbus_pids(msg, &parent_pids);
+	ASSERT_RETURN(ret == 0);
+
+	kdbus_msg_free(msg);
+
+
+	/*
+	 * Receive broadcast from privileged connection
+	 */
+	kdbus_printf("\nPrivileged → unprivileged (same namespaces):\n");
+	ret = kdbus_msg_recv_poll(unpriv_conn, 300, &msg, NULL);
+
+	ASSERT_EXIT(ret == 0);
+	ASSERT_EXIT(msg->dst_id == KDBUS_DST_ID_BROADCAST);
+
+	/* will get the privileged creds */
+	ret = kdbus_match_kdbus_creds(msg, &privileged_creds);
+	ASSERT_EXIT(ret == 0);
+
+	ret = kdbus_match_kdbus_pids(msg, &parent_pids);
+	ASSERT_RETURN(ret == 0);
+
+	kdbus_msg_free(msg);
+
+out:
+	kdbus_conn_free(unpriv_conn);
+
+	return ret;
+}
+
+static int kdbus_clone_userns_test(const char *bus,
+				   struct kdbus_conn *conn)
+{
+	int ret, status, efd;
+	pid_t pid, ppid;
+	uint64_t unpriv_conn_id, userns_conn_id;
+	struct kdbus_msg *msg;
+	const struct kdbus_item *item;
+	struct kdbus_pids expected_pids;
+	struct kdbus_conn *monitor;
+
+	kdbus_printf("STARTING TEST 'metadata-ns'.\n");
+
+	monitor = kdbus_hello(bus, KDBUS_HELLO_MONITOR, NULL, 0);
+	ASSERT_EXIT(monitor);
+
+	/*
+	 * parent will signal to child that is in its
+	 * userns to read its queue
+	 */
+	efd = eventfd(0, EFD_CLOEXEC);
+	ASSERT_RETURN_VAL(efd >= 0, efd);
+
+	ppid = getppid();
+
+	pid = fork();
+	ASSERT_RETURN_VAL(pid >= 0, -errno);
+
+	if (pid == 0) {
+		ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
+		ASSERT_EXIT_VAL(ret == 0, -errno);
+
+		ret = __kdbus_clone_userns_test(bus, conn, ppid, efd);
+		_exit(ret);
+	}
+
+
+	/* Phase 1) privileged receives from unprivileged */
+
+	/*
+	 * Receive from the unprivileged child
+	 */
+	kdbus_printf("\nUnprivileged → privileged (same namespaces):\n");
+	ret = kdbus_msg_recv_poll(conn, 300, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	unpriv_conn_id = msg->src_id;
+
+	/* Unprivileged user */
+	ret = kdbus_match_kdbus_creds(msg, &unmapped_creds);
+	ASSERT_RETURN(ret == 0);
+
+	/* Set the expected creds_pids */
+	expected_pids = (struct kdbus_pids) {
+		.pid = pid,
+		.tid = pid,
+		.ppid = getpid(),
+	};
+	ret = kdbus_match_kdbus_pids(msg, &expected_pids);
+	ASSERT_RETURN(ret == 0);
+
+	kdbus_msg_free(msg);
+
+
+	/*
+	 * Receive from the unprivileged that is in his own
+	 * userns and pidns
+	 */
+
+	kdbus_printf("\nUnprivileged/privileged in its userns → privileged "
+		     "(different userns and pidns)\n");
+	ret = kdbus_msg_recv_poll(conn, 300, &msg, NULL);
+	if (ret == -ETIMEDOUT)
+		/* perhaps unprivileged userns is not allowed */
+		goto wait;
+
+	ASSERT_RETURN(ret == 0);
+
+	userns_conn_id = msg->src_id;
+
+	item = kdbus_get_item(msg, KDBUS_ITEM_CAPS);
+	ASSERT_RETURN(item);
+
+	/*
+	 * Compare received items, creds must be translated into
+	 * the receiver user namespace, so the user is unprivileged
+	 */
+	ret = kdbus_match_kdbus_creds(msg, &unmapped_creds);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * We should have the kdbus_pids since we are the parent
+	 * pidns
+	 */
+	item = kdbus_get_item(msg, KDBUS_ITEM_PIDS);
+	ASSERT_RETURN(item);
+
+	ASSERT_RETURN(memcmp(&item->pids, &unmapped_pids,
+			     sizeof(struct kdbus_pids)) != 0);
+
+	/*
+	 * Parent pid of the unprivileged/privileged in its userns
+	 * is the unprivileged child pid that was forked here.
+	 */
+	ASSERT_RETURN((uint64_t)pid == item->pids.ppid);
+
+	kdbus_msg_free(msg);
+
+
+	/* Phase 2) Privileged connection sends now 3 packets */
+
+	/*
+	 * Sending to unprivileged connections a unicast
+	 */
+	ret = kdbus_msg_send(conn, NULL, 0xdeadbeef, 0, 0,
+			     0, unpriv_conn_id);
+	ASSERT_RETURN(ret == 0);
+
+	/* signal to child that is in its userns */
+	ret = eventfd_write(efd, 1);
+	ASSERT_EXIT(ret == 0);
+
+	/*
+	 * Sending to unprivileged/privilged in its userns
+	 * connections a unicast
+	 */
+	ret = kdbus_msg_send(conn, NULL, 0xdeadbeef, 0, 0,
+			     0, userns_conn_id);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Sending to unprivileged connections a broadcast
+	 */
+	ret = kdbus_msg_send(conn, NULL, 0xdeadbeef, 0, 0,
+			     0, KDBUS_DST_ID_BROADCAST);
+	ASSERT_RETURN(ret == 0);
+
+
+wait:
+	ret = waitpid(pid, &status, 0);
+	ASSERT_RETURN(ret >= 0);
+
+	ASSERT_RETURN(WIFEXITED(status))
+	ASSERT_RETURN(!WEXITSTATUS(status));
+
+	/* Dump monitor queue */
+	kdbus_printf("\n\nMonitor queue:\n");
+	for (;;) {
+		ret = kdbus_msg_recv_poll(monitor, 100, &msg, NULL);
+		if (ret < 0)
+			break;
+
+		if (msg->payload_type == KDBUS_PAYLOAD_DBUS) {
+			/*
+			 * Parent pidns should see all the
+			 * pids
+			 */
+			item = kdbus_get_item(msg, KDBUS_ITEM_PIDS);
+			ASSERT_RETURN(item);
+
+			ASSERT_RETURN(item->pids.pid != 0 &&
+				      item->pids.tid != 0 &&
+				      item->pids.ppid != 0);
+		}
+
+		kdbus_msg_free(msg);
+	}
+
+	kdbus_conn_free(monitor);
+	close(efd);
+
+	return 0;
+}
+
+int kdbus_test_metadata_ns(struct kdbus_test_env *env)
+{
+	int ret;
+	struct kdbus_conn *holder, *conn;
+	struct kdbus_policy_access policy_access = {
+		/* Allow world so we can inspect metadata in namespace */
+		.type = KDBUS_POLICY_ACCESS_WORLD,
+		.id = geteuid(),
+		.access = KDBUS_POLICY_TALK,
+	};
+
+	/*
+	 * We require user-namespaces and all uids/gids
+	 * should be mapped (we can just require the necessary ones)
+	 */
+	if (!config_user_ns_is_enabled() ||
+	    !all_uids_gids_are_mapped())
+		return TEST_SKIP;
+
+	ret = test_is_capable(CAP_SETUID, CAP_SETGID, CAP_SYS_ADMIN, -1);
+	ASSERT_RETURN(ret >= 0);
+
+	/* no enough privileges, SKIP test */
+	if (!ret)
+		return TEST_SKIP;
+
+	holder = kdbus_hello_registrar(env->buspath, "com.example.metadata",
+				       &policy_access, 1,
+				       KDBUS_HELLO_POLICY_HOLDER);
+	ASSERT_RETURN(holder);
+
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn);
+
+	ret = kdbus_add_match_empty(conn);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_name_acquire(conn, "com.example.metadata", NULL);
+	ASSERT_EXIT(ret >= 0);
+
+	ret = kdbus_clone_userns_test(env->buspath, conn);
+	ASSERT_RETURN(ret == 0);
+
+	kdbus_conn_free(holder);
+	kdbus_conn_free(conn);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-monitor.c linux-4.2-pck/tools/testing/selftests/kdbus/test-monitor.c
--- linux-4.2/tools/testing/selftests/kdbus/test-monitor.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-monitor.c	2015-09-08 00:48:22.248362932 -0300
@@ -0,0 +1,176 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <assert.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/capability.h>
+#include <sys/wait.h>
+
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+#include "kdbus-test.h"
+
+int kdbus_test_monitor(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *monitor, *conn;
+	unsigned int cookie = 0xdeadbeef;
+	struct kdbus_msg *msg;
+	uint64_t offset = 0;
+	int ret;
+
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn);
+
+	/* add matches to make sure the monitor do not trigger an item add or
+	 * remove on connect and disconnect, respectively.
+	 */
+	ret = kdbus_add_match_id(conn, 0x1, KDBUS_ITEM_ID_ADD,
+				 KDBUS_MATCH_ID_ANY);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_add_match_id(conn, 0x2, KDBUS_ITEM_ID_REMOVE,
+				 KDBUS_MATCH_ID_ANY);
+	ASSERT_RETURN(ret == 0);
+
+	/* register a monitor */
+	monitor = kdbus_hello(env->buspath, KDBUS_HELLO_MONITOR, NULL, 0);
+	ASSERT_RETURN(monitor);
+
+	/* make sure we did not receive a monitor connect notification */
+	ret = kdbus_msg_recv(conn, &msg, &offset);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	/* check that a monitor cannot acquire a name */
+	ret = kdbus_name_acquire(monitor, "foo.bar.baz", NULL);
+	ASSERT_RETURN(ret == -EOPNOTSUPP);
+
+	ret = kdbus_msg_send(env->conn, NULL, cookie, 0, 0,  0, conn->id);
+	ASSERT_RETURN(ret == 0);
+
+	/* the recipient should have gotten the message */
+	ret = kdbus_msg_recv(conn, &msg, &offset);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == cookie);
+	kdbus_msg_free(msg);
+	kdbus_free(conn, offset);
+
+	/* and so should the monitor */
+	ret = kdbus_msg_recv(monitor, &msg, &offset);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == cookie);
+
+	kdbus_msg_free(msg);
+	kdbus_free(monitor, offset);
+
+	/* Installing matches for monitors must fais must fail */
+	ret = kdbus_add_match_empty(monitor);
+	ASSERT_RETURN(ret == -EOPNOTSUPP);
+
+	cookie++;
+	ret = kdbus_msg_send(env->conn, NULL, cookie, 0, 0, 0,
+			     KDBUS_DST_ID_BROADCAST);
+	ASSERT_RETURN(ret == 0);
+
+	/* The monitor should get the message. */
+	ret = kdbus_msg_recv_poll(monitor, 100, &msg, &offset);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == cookie);
+
+	kdbus_msg_free(msg);
+	kdbus_free(monitor, offset);
+
+	/*
+	 * Since we are the only monitor, update the attach flags
+	 * and tell we are not interessted in attach flags recv
+	 */
+
+	ret = kdbus_conn_update_attach_flags(monitor,
+					     _KDBUS_ATTACH_ALL,
+					     0);
+	ASSERT_RETURN(ret == 0);
+
+	cookie++;
+	ret = kdbus_msg_send(env->conn, NULL, cookie, 0, 0, 0,
+			     KDBUS_DST_ID_BROADCAST);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv_poll(monitor, 100, &msg, &offset);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == cookie);
+
+	ret = kdbus_item_in_message(msg, KDBUS_ITEM_TIMESTAMP);
+	ASSERT_RETURN(ret == 0);
+
+	kdbus_msg_free(msg);
+	kdbus_free(monitor, offset);
+
+	/*
+	 * Now we are interested in KDBUS_ITEM_TIMESTAMP and
+	 * KDBUS_ITEM_CREDS
+	 */
+	ret = kdbus_conn_update_attach_flags(monitor,
+					     _KDBUS_ATTACH_ALL,
+					     KDBUS_ATTACH_TIMESTAMP |
+					     KDBUS_ATTACH_CREDS);
+	ASSERT_RETURN(ret == 0);
+
+	cookie++;
+	ret = kdbus_msg_send(env->conn, NULL, cookie, 0, 0, 0,
+			     KDBUS_DST_ID_BROADCAST);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv_poll(monitor, 100, &msg, &offset);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == cookie);
+
+	ret = kdbus_item_in_message(msg, KDBUS_ITEM_TIMESTAMP);
+	ASSERT_RETURN(ret == 1);
+
+	ret = kdbus_item_in_message(msg, KDBUS_ITEM_CREDS);
+	ASSERT_RETURN(ret == 1);
+
+	/* the KDBUS_ITEM_PID_COMM was not requested */
+	ret = kdbus_item_in_message(msg, KDBUS_ITEM_PID_COMM);
+	ASSERT_RETURN(ret == 0);
+
+	kdbus_msg_free(msg);
+	kdbus_free(monitor, offset);
+
+	kdbus_conn_free(monitor);
+	/* make sure we did not receive a monitor disconnect notification */
+	ret = kdbus_msg_recv(conn, &msg, &offset);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	kdbus_conn_free(conn);
+
+	/* Make sure that monitor as unprivileged is not allowed */
+	ret = test_is_capable(CAP_SETUID, CAP_SETGID, -1);
+	ASSERT_RETURN(ret >= 0);
+
+	if (ret && all_uids_gids_are_mapped()) {
+		ret = RUN_UNPRIVILEGED(UNPRIV_UID, UNPRIV_UID, ({
+			monitor = kdbus_hello(env->buspath,
+					      KDBUS_HELLO_MONITOR,
+					      NULL, 0);
+			ASSERT_EXIT(!monitor && errno == EPERM);
+
+			_exit(EXIT_SUCCESS);
+		}),
+		({ 0; }));
+		ASSERT_RETURN(ret == 0);
+	}
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-names.c linux-4.2-pck/tools/testing/selftests/kdbus/test-names.c
--- linux-4.2/tools/testing/selftests/kdbus/test-names.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-names.c	2015-09-08 00:48:22.248362932 -0300
@@ -0,0 +1,272 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <limits.h>
+#include <getopt.h>
+#include <stdbool.h>
+
+#include "kdbus-api.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+#include "kdbus-test.h"
+
+struct test_name {
+	const char *name;
+	__u64 owner_id;
+	__u64 flags;
+};
+
+static bool conn_test_names(const struct kdbus_conn *conn,
+			    const struct test_name *tests,
+			    unsigned int n_tests)
+{
+	struct kdbus_cmd_list cmd_list = {};
+	struct kdbus_info *name, *list;
+	unsigned int i;
+	int ret;
+
+	cmd_list.size = sizeof(cmd_list);
+	cmd_list.flags = KDBUS_LIST_NAMES |
+			 KDBUS_LIST_ACTIVATORS |
+			 KDBUS_LIST_QUEUED;
+
+	ret = kdbus_cmd_list(conn->fd, &cmd_list);
+	ASSERT_RETURN(ret == 0);
+
+	list = (struct kdbus_info *)(conn->buf + cmd_list.offset);
+
+	for (i = 0; i < n_tests; i++) {
+		const struct test_name *t = tests + i;
+		bool found = false;
+
+		KDBUS_FOREACH(name, list, cmd_list.list_size) {
+			struct kdbus_item *item;
+
+			KDBUS_ITEM_FOREACH(item, name, items) {
+				if (item->type != KDBUS_ITEM_OWNED_NAME ||
+				    strcmp(item->name.name, t->name) != 0)
+					continue;
+
+				if (t->owner_id == name->id &&
+				    t->flags == item->name.flags) {
+					found = true;
+					break;
+				}
+			}
+		}
+
+		if (!found)
+			return false;
+	}
+
+	return true;
+}
+
+static bool conn_is_name_primary_owner(const struct kdbus_conn *conn,
+				       const char *needle)
+{
+	struct test_name t = {
+		.name = needle,
+		.owner_id = conn->id,
+		.flags = KDBUS_NAME_PRIMARY,
+	};
+
+	return conn_test_names(conn, &t, 1);
+}
+
+int kdbus_test_name_basic(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *conn;
+	char *name, *dot_name, *invalid_name, *wildcard_name;
+	int ret;
+
+	name = "foo.bla.blaz";
+	dot_name = ".bla.blaz";
+	invalid_name = "foo";
+	wildcard_name = "foo.bla.bl.*";
+
+	/* create a 2nd connection */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn != NULL);
+
+	/* acquire name "foo.bar.xxx" name */
+	ret = kdbus_name_acquire(conn, "foo.bar.xxx", NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* Name is not valid, must fail */
+	ret = kdbus_name_acquire(env->conn, dot_name, NULL);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	ret = kdbus_name_acquire(env->conn, invalid_name, NULL);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	ret = kdbus_name_acquire(env->conn, wildcard_name, NULL);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	/* check that we can acquire a name */
+	ret = kdbus_name_acquire(env->conn, name, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ret = conn_is_name_primary_owner(env->conn, name);
+	ASSERT_RETURN(ret == true);
+
+	/* ... and release it again */
+	ret = kdbus_name_release(env->conn, name);
+	ASSERT_RETURN(ret == 0);
+
+	ret = conn_is_name_primary_owner(env->conn, name);
+	ASSERT_RETURN(ret == false);
+
+	/* check that we can't release it again */
+	ret = kdbus_name_release(env->conn, name);
+	ASSERT_RETURN(ret == -ESRCH);
+
+	/* check that we can't release a name that we don't own */
+	ret = kdbus_name_release(env->conn, "foo.bar.xxx");
+	ASSERT_RETURN(ret == -EADDRINUSE);
+
+	/* Name is not valid, must fail */
+	ret = kdbus_name_release(env->conn, dot_name);
+	ASSERT_RETURN(ret == -ESRCH);
+
+	ret = kdbus_name_release(env->conn, invalid_name);
+	ASSERT_RETURN(ret == -ESRCH);
+
+	ret = kdbus_name_release(env->conn, wildcard_name);
+	ASSERT_RETURN(ret == -ESRCH);
+
+	kdbus_conn_free(conn);
+
+	return TEST_OK;
+}
+
+int kdbus_test_name_conflict(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *conn;
+	char *name;
+	int ret;
+
+	name = "foo.bla.blaz";
+
+	/* create a 2nd connection */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn != NULL);
+
+	/* allow the new connection to own the same name */
+	/* acquire name from the 1st connection */
+	ret = kdbus_name_acquire(env->conn, name, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ret = conn_is_name_primary_owner(env->conn, name);
+	ASSERT_RETURN(ret == true);
+
+	/* check that we also can't acquire it again from the 2nd connection */
+	ret = kdbus_name_acquire(conn, name, NULL);
+	ASSERT_RETURN(ret == -EEXIST);
+
+	kdbus_conn_free(conn);
+
+	return TEST_OK;
+}
+
+int kdbus_test_name_queue(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *conn;
+	struct test_name t[2];
+	const char *name;
+	uint64_t flags;
+	int ret;
+
+	name = "foo.bla.blaz";
+
+	flags = 0;
+
+	/* create a 2nd connection */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn != NULL);
+
+	/* allow the new connection to own the same name */
+	/* acquire name from the 1st connection */
+	ret = kdbus_name_acquire(env->conn, name, &flags);
+	ASSERT_RETURN(ret == 0);
+
+	ret = conn_is_name_primary_owner(env->conn, name);
+	ASSERT_RETURN(ret == true);
+
+	/* queue the 2nd connection as waiting owner */
+	flags = KDBUS_NAME_QUEUE;
+	ret = kdbus_name_acquire(conn, name, &flags);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(flags & KDBUS_NAME_IN_QUEUE);
+
+	t[0].name = name;
+	t[0].owner_id = env->conn->id;
+	t[0].flags = KDBUS_NAME_PRIMARY;
+	t[1].name = name;
+	t[1].owner_id = conn->id;
+	t[1].flags = KDBUS_NAME_QUEUE | KDBUS_NAME_IN_QUEUE;
+	ret = conn_test_names(conn, t, 2);
+	ASSERT_RETURN(ret == true);
+
+	/* release name from 1st connection */
+	ret = kdbus_name_release(env->conn, name);
+	ASSERT_RETURN(ret == 0);
+
+	/* now the name should be owned by the 2nd connection */
+	t[0].name = name;
+	t[0].owner_id = conn->id;
+	t[0].flags = KDBUS_NAME_PRIMARY | KDBUS_NAME_QUEUE;
+	ret = conn_test_names(conn, t, 1);
+	ASSERT_RETURN(ret == true);
+
+	kdbus_conn_free(conn);
+
+	return TEST_OK;
+}
+
+int kdbus_test_name_takeover(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *conn;
+	struct test_name t;
+	const char *name;
+	uint64_t flags;
+	int ret;
+
+	name = "foo.bla.blaz";
+
+	flags = KDBUS_NAME_ALLOW_REPLACEMENT;
+
+	/* create a 2nd connection */
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn != NULL);
+
+	/* acquire name for 1st connection */
+	ret = kdbus_name_acquire(env->conn, name, &flags);
+	ASSERT_RETURN(ret == 0);
+
+	t.name = name;
+	t.owner_id = env->conn->id;
+	t.flags = KDBUS_NAME_ALLOW_REPLACEMENT | KDBUS_NAME_PRIMARY;
+	ret = conn_test_names(conn, &t, 1);
+	ASSERT_RETURN(ret == true);
+
+	/* now steal name with 2nd connection */
+	flags = KDBUS_NAME_REPLACE_EXISTING;
+	ret = kdbus_name_acquire(conn, name, &flags);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(flags & KDBUS_NAME_ACQUIRED);
+
+	ret = conn_is_name_primary_owner(conn, name);
+	ASSERT_RETURN(ret == true);
+
+	kdbus_conn_free(conn);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-policy-ns.c linux-4.2-pck/tools/testing/selftests/kdbus/test-policy-ns.c
--- linux-4.2/tools/testing/selftests/kdbus/test-policy-ns.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-policy-ns.c	2015-09-08 00:48:22.248362932 -0300
@@ -0,0 +1,632 @@
+/*
+ * Test metadata and policies in new namespaces. Even if our tests
+ * can run in a namespaced setup, this test is necessary so we can
+ * inspect policies on the same kdbusfs but between multiple
+ * namespaces.
+ *
+ * Copyright (C) 2014-2015 Djalal Harouni
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/eventfd.h>
+#include <sys/syscall.h>
+#include <sys/capability.h>
+#include <linux/sched.h>
+
+#include "kdbus-test.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+#define MAX_CONN	64
+#define POLICY_NAME	"foo.test.policy-test"
+
+#define KDBUS_CONN_MAX_MSGS_PER_USER            16
+
+/**
+ * Note: this test can be used to inspect policy_db->talk_access_hash
+ *
+ * The purpose of these tests:
+ * 1) Check KDBUS_POLICY_TALK
+ * 2) Check the cache state: kdbus_policy_db->talk_access_hash
+ * Should be extended
+ */
+
+/**
+ * Check a list of connections against conn_db[0]
+ * conn_db[0] will own the name "foo.test.policy-test" and the
+ * policy holder connection for this name will update the policy
+ * entries, so different use cases can be tested.
+ */
+static struct kdbus_conn **conn_db;
+
+static void *kdbus_recv_echo(void *ptr)
+{
+	int ret;
+	struct kdbus_conn *conn = ptr;
+
+	ret = kdbus_msg_recv_poll(conn, 200, NULL, NULL);
+
+	return (void *)(long)ret;
+}
+
+/* Trigger kdbus_policy_set() */
+static int kdbus_set_policy_talk(struct kdbus_conn *conn,
+				 const char *name,
+				 uid_t id, unsigned int type)
+{
+	int ret;
+	struct kdbus_policy_access access = {
+		.type = type,
+		.id = id,
+		.access = KDBUS_POLICY_TALK,
+	};
+
+	ret = kdbus_conn_update_policy(conn, name, &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	return TEST_OK;
+}
+
+/* return TEST_OK or TEST_ERR on failure */
+static int kdbus_register_same_activator(char *bus, const char *name,
+					 struct kdbus_conn **c)
+{
+	int ret;
+	struct kdbus_conn *activator;
+
+	activator = kdbus_hello_activator(bus, name, NULL, 0);
+	if (activator) {
+		*c = activator;
+		fprintf(stderr, "--- error was able to register name twice '%s'.\n",
+			name);
+		return TEST_ERR;
+	}
+
+	ret = -errno;
+	/* -EEXIST means test succeeded */
+	if (ret == -EEXIST)
+		return TEST_OK;
+
+	return TEST_ERR;
+}
+
+/* return TEST_OK or TEST_ERR on failure */
+static int kdbus_register_policy_holder(char *bus, const char *name,
+					struct kdbus_conn **conn)
+{
+	struct kdbus_conn *c;
+	struct kdbus_policy_access access[2];
+
+	access[0].type = KDBUS_POLICY_ACCESS_USER;
+	access[0].access = KDBUS_POLICY_OWN;
+	access[0].id = geteuid();
+
+	access[1].type = KDBUS_POLICY_ACCESS_WORLD;
+	access[1].access = KDBUS_POLICY_TALK;
+	access[1].id = geteuid();
+
+	c = kdbus_hello_registrar(bus, name, access, 2,
+				  KDBUS_HELLO_POLICY_HOLDER);
+	ASSERT_RETURN(c);
+
+	*conn = c;
+
+	return TEST_OK;
+}
+
+/**
+ * Create new threads for receiving from multiple senders,
+ * The 'conn_db' will be populated by newly created connections.
+ * Caller should free all allocated connections.
+ *
+ * return 0 on success, negative errno on failure.
+ */
+static int kdbus_recv_in_threads(const char *bus, const char *name,
+				 struct kdbus_conn **conn_db)
+{
+	int ret;
+	bool pool_full = false;
+	unsigned int sent_packets = 0;
+	unsigned int lost_packets = 0;
+	unsigned int i, tid;
+	unsigned long dst_id;
+	unsigned long cookie = 1;
+	unsigned int thread_nr = MAX_CONN - 1;
+	pthread_t thread_id[MAX_CONN - 1] = {'\0'};
+
+	dst_id = name ? KDBUS_DST_ID_NAME : conn_db[0]->id;
+
+	for (tid = 0, i = 1; tid < thread_nr; tid++, i++) {
+		ret = pthread_create(&thread_id[tid], NULL,
+				     kdbus_recv_echo, (void *)conn_db[0]);
+		if (ret < 0) {
+			ret = -errno;
+			kdbus_printf("error pthread_create: %d (%m)\n",
+				      ret);
+			break;
+		}
+
+		/* just free before re-using */
+		kdbus_conn_free(conn_db[i]);
+		conn_db[i] = NULL;
+
+		/* We need to create connections here */
+		conn_db[i] = kdbus_hello(bus, 0, NULL, 0);
+		if (!conn_db[i]) {
+			ret = -errno;
+			break;
+		}
+
+		ret = kdbus_add_match_empty(conn_db[i]);
+		if (ret < 0)
+			break;
+
+		ret = kdbus_msg_send(conn_db[i], name, cookie++,
+				     0, 0, 0, dst_id);
+		if (ret < 0) {
+			/*
+			 * Receivers are not reading their messages,
+			 * not scheduled ?!
+			 *
+			 * So set the pool full here, perhaps the
+			 * connection pool or queue was full, later
+			 * recheck receivers errors
+			 */
+			if (ret == -ENOBUFS || ret == -EXFULL)
+				pool_full = true;
+			break;
+		}
+
+		sent_packets++;
+	}
+
+	for (tid = 0; tid < thread_nr; tid++) {
+		int thread_ret = 0;
+
+		if (thread_id[tid]) {
+			pthread_join(thread_id[tid], (void *)&thread_ret);
+			if (thread_ret < 0) {
+				/* Update only if send did not fail */
+				if (ret == 0)
+					ret = thread_ret;
+
+				lost_packets++;
+			}
+		}
+	}
+
+	/*
+	 * When sending if we did fail with -ENOBUFS or -EXFULL
+	 * then we should have set lost_packet and we should at
+	 * least have sent_packets set to KDBUS_CONN_MAX_MSGS_PER_USER
+	 */
+	if (pool_full) {
+		ASSERT_RETURN(lost_packets > 0);
+
+		/*
+		 * We should at least send KDBUS_CONN_MAX_MSGS_PER_USER
+		 *
+		 * For every send operation we create a thread to
+		 * recv the packet, so we keep the queue clean
+		 */
+		ASSERT_RETURN(sent_packets >= KDBUS_CONN_MAX_MSGS_PER_USER);
+
+		/*
+		 * Set ret to zero since we only failed due to
+		 * the receiving threads that have not been
+		 * scheduled
+		 */
+		ret = 0;
+	}
+
+	return ret;
+}
+
+/* Return: TEST_OK or TEST_ERR on failure */
+static int kdbus_normal_test(const char *bus, const char *name,
+			     struct kdbus_conn **conn_db)
+{
+	int ret;
+
+	ret = kdbus_recv_in_threads(bus, name, conn_db);
+	ASSERT_RETURN(ret >= 0);
+
+	return TEST_OK;
+}
+
+static int kdbus_fork_test_by_id(const char *bus,
+				 struct kdbus_conn **conn_db,
+				 int parent_status, int child_status)
+{
+	int ret;
+	pid_t pid;
+	uint64_t cookie = 0x9876ecba;
+	struct kdbus_msg *msg = NULL;
+	uint64_t offset = 0;
+	int status = 0;
+
+	/*
+	 * If the child_status is not EXIT_SUCCESS, then we expect
+	 * that sending from the child will fail, thus receiving
+	 * from parent must error with -ETIMEDOUT, and vice versa.
+	 */
+	bool parent_timedout = !!child_status;
+	bool child_timedout = !!parent_status;
+
+	pid = fork();
+	ASSERT_RETURN_VAL(pid >= 0, pid);
+
+	if (pid == 0) {
+		struct kdbus_conn *conn_src;
+
+		ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
+		ASSERT_EXIT(ret == 0);
+
+		ret = drop_privileges(65534, 65534);
+		ASSERT_EXIT(ret == 0);
+
+		conn_src = kdbus_hello(bus, 0, NULL, 0);
+		ASSERT_EXIT(conn_src);
+
+		ret = kdbus_add_match_empty(conn_src);
+		ASSERT_EXIT(ret == 0);
+
+		/*
+		 * child_status is always checked against send
+		 * operations, in case it fails always return
+		 * EXIT_FAILURE.
+		 */
+		ret = kdbus_msg_send(conn_src, NULL, cookie,
+				     0, 0, 0, conn_db[0]->id);
+		ASSERT_EXIT(ret == child_status);
+
+		ret = kdbus_msg_recv_poll(conn_src, 100, NULL, NULL);
+
+		kdbus_conn_free(conn_src);
+
+		/*
+		 * Child kdbus_msg_recv_poll() should timeout since
+		 * the parent_status was set to a non EXIT_SUCCESS
+		 * value.
+		 */
+		if (child_timedout)
+			_exit(ret == -ETIMEDOUT ? EXIT_SUCCESS : EXIT_FAILURE);
+
+		_exit(ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+	}
+
+	ret = kdbus_msg_recv_poll(conn_db[0], 100, &msg, &offset);
+	/*
+	 * If parent_timedout is set then this should fail with
+	 * -ETIMEDOUT since the child_status was set to a non
+	 * EXIT_SUCCESS value. Otherwise, assume
+	 * that kdbus_msg_recv_poll() has succeeded.
+	 */
+	if (parent_timedout) {
+		ASSERT_RETURN_VAL(ret == -ETIMEDOUT, TEST_ERR);
+
+		/* timedout no need to continue, we don't have the
+		 * child connection ID, so just terminate. */
+		goto out;
+	} else {
+		ASSERT_RETURN_VAL(ret == 0, ret);
+	}
+
+	ret = kdbus_msg_send(conn_db[0], NULL, ++cookie,
+			     0, 0, 0, msg->src_id);
+	/*
+	 * parent_status is checked against send operations,
+	 * on failures always return TEST_ERR.
+	 */
+	ASSERT_RETURN_VAL(ret == parent_status, TEST_ERR);
+
+	kdbus_msg_free(msg);
+	kdbus_free(conn_db[0], offset);
+
+out:
+	ret = waitpid(pid, &status, 0);
+	ASSERT_RETURN_VAL(ret >= 0, ret);
+
+	return (status == EXIT_SUCCESS) ? TEST_OK : TEST_ERR;
+}
+
+/*
+ * Return: TEST_OK, TEST_ERR or TEST_SKIP
+ * we return TEST_OK only if the children return with the expected
+ * 'expected_status' that is specified as an argument.
+ */
+static int kdbus_fork_test(const char *bus, const char *name,
+			   struct kdbus_conn **conn_db, int expected_status)
+{
+	pid_t pid;
+	int ret = 0;
+	int status = 0;
+
+	pid = fork();
+	ASSERT_RETURN_VAL(pid >= 0, pid);
+
+	if (pid == 0) {
+		ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
+		ASSERT_EXIT(ret == 0);
+
+		ret = drop_privileges(65534, 65534);
+		ASSERT_EXIT(ret == 0);
+
+		ret = kdbus_recv_in_threads(bus, name, conn_db);
+		_exit(ret == expected_status ? EXIT_SUCCESS : EXIT_FAILURE);
+	}
+
+	ret = waitpid(pid, &status, 0);
+	ASSERT_RETURN(ret >= 0);
+
+	return (status == EXIT_SUCCESS) ? TEST_OK : TEST_ERR;
+}
+
+/* Return EXIT_SUCCESS, EXIT_FAILURE or negative errno */
+static int __kdbus_clone_userns_test(const char *bus,
+				     const char *name,
+				     struct kdbus_conn **conn_db,
+				     int expected_status)
+{
+	int efd;
+	pid_t pid;
+	int ret = 0;
+	unsigned int uid = 65534;
+	int status;
+
+	ret = drop_privileges(uid, uid);
+	ASSERT_RETURN_VAL(ret == 0, ret);
+
+	/*
+	 * Since we just dropped privileges, the dumpable flag was just
+	 * cleared which makes the /proc/$clone_child/uid_map to be
+	 * owned by root, hence any userns uid mapping will fail with
+	 * -EPERM since the mapping will be done by uid 65534.
+	 *
+	 * To avoid this set the dumpable flag again which makes procfs
+	 * update the /proc/$clone_child/ inodes owner to 65534.
+	 *
+	 * Using this we will be able write to /proc/$clone_child/uid_map
+	 * as uid 65534 and map the uid 65534 to 0 inside the user
+	 * namespace.
+	 */
+	ret = prctl(PR_SET_DUMPABLE, SUID_DUMP_USER);
+	ASSERT_RETURN_VAL(ret == 0, ret);
+
+	/* sync parent/child */
+	efd = eventfd(0, EFD_CLOEXEC);
+	ASSERT_RETURN_VAL(efd >= 0, efd);
+
+	pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWUSER, NULL);
+	if (pid < 0) {
+		ret = -errno;
+		kdbus_printf("error clone: %d (%m)\n", ret);
+		/*
+		 * Normal user not allowed to create userns,
+		 * so nothing to worry about ?
+		 */
+		if (ret == -EPERM) {
+			kdbus_printf("-- CLONE_NEWUSER TEST Failed for uid: %u\n"
+				"-- Make sure that your kernel do not allow "
+				"CLONE_NEWUSER for unprivileged users\n"
+				"-- Upstream Commit: "
+				"https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=5eaf563e\n",
+				uid);
+			ret = 0;
+		}
+
+		return ret;
+	}
+
+	if (pid == 0) {
+		struct kdbus_conn *conn_src;
+		eventfd_t event_status = 0;
+
+		ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
+		ASSERT_EXIT(ret == 0);
+
+		ret = eventfd_read(efd, &event_status);
+		ASSERT_EXIT(ret >= 0 && event_status == 1);
+
+		/* ping connection from the new user namespace */
+		conn_src = kdbus_hello(bus, 0, NULL, 0);
+		ASSERT_EXIT(conn_src);
+
+		ret = kdbus_add_match_empty(conn_src);
+		ASSERT_EXIT(ret == 0);
+
+		ret = kdbus_msg_send(conn_src, name, 0xabcd1234,
+				     0, 0, 0, KDBUS_DST_ID_NAME);
+		kdbus_conn_free(conn_src);
+
+		_exit(ret == expected_status ? EXIT_SUCCESS : EXIT_FAILURE);
+	}
+
+	ret = userns_map_uid_gid(pid, "0 65534 1", "0 65534 1");
+	ASSERT_RETURN_VAL(ret == 0, ret);
+
+	/* Tell child we are ready */
+	ret = eventfd_write(efd, 1);
+	ASSERT_RETURN_VAL(ret == 0, ret);
+
+	ret = waitpid(pid, &status, 0);
+	ASSERT_RETURN_VAL(ret >= 0, ret);
+
+	close(efd);
+
+	return status == EXIT_SUCCESS ? TEST_OK : TEST_ERR;
+}
+
+static int kdbus_clone_userns_test(const char *bus,
+				   const char *name,
+				   struct kdbus_conn **conn_db,
+				   int expected_status)
+{
+	pid_t pid;
+	int ret = 0;
+	int status;
+
+	pid = fork();
+	ASSERT_RETURN_VAL(pid >= 0, -errno);
+
+	if (pid == 0) {
+		ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
+		if (ret < 0)
+			_exit(EXIT_FAILURE);
+
+		ret = __kdbus_clone_userns_test(bus, name, conn_db,
+						expected_status);
+		_exit(ret);
+	}
+
+	/*
+	 * Receive in the original (root privileged) user namespace,
+	 * must fail with -ETIMEDOUT.
+	 */
+	ret = kdbus_msg_recv_poll(conn_db[0], 100, NULL, NULL);
+	ASSERT_RETURN_VAL(ret == -ETIMEDOUT, ret);
+
+	ret = waitpid(pid, &status, 0);
+	ASSERT_RETURN_VAL(ret >= 0, ret);
+
+	return (status == EXIT_SUCCESS) ? TEST_OK : TEST_ERR;
+}
+
+int kdbus_test_policy_ns(struct kdbus_test_env *env)
+{
+	int i;
+	int ret;
+	struct kdbus_conn *activator = NULL;
+	struct kdbus_conn *policy_holder = NULL;
+	char *bus = env->buspath;
+
+	ret = test_is_capable(CAP_SETUID, CAP_SETGID, -1);
+	ASSERT_RETURN(ret >= 0);
+
+	/* no enough privileges, SKIP test */
+	if (!ret)
+		return TEST_SKIP;
+
+	/* we require user-namespaces */
+	if (access("/proc/self/uid_map", F_OK) != 0)
+		return TEST_SKIP;
+
+	/* uids/gids must be mapped */
+	if (!all_uids_gids_are_mapped())
+		return TEST_SKIP;
+
+	conn_db = calloc(MAX_CONN, sizeof(struct kdbus_conn *));
+	ASSERT_RETURN(conn_db);
+
+	memset(conn_db, 0, MAX_CONN * sizeof(struct kdbus_conn *));
+
+	conn_db[0] = kdbus_hello(bus, 0, NULL, 0);
+	ASSERT_RETURN(conn_db[0]);
+
+	ret = kdbus_add_match_empty(conn_db[0]);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_fork_test_by_id(bus, conn_db, -EPERM, -EPERM);
+	ASSERT_EXIT(ret == 0);
+
+	ret = kdbus_register_policy_holder(bus, POLICY_NAME,
+					   &policy_holder);
+	ASSERT_RETURN(ret == 0);
+
+	/* Try to register the same name with an activator */
+	ret = kdbus_register_same_activator(bus, POLICY_NAME,
+					    &activator);
+	ASSERT_RETURN(ret == 0);
+
+	/* Acquire POLICY_NAME */
+	ret = kdbus_name_acquire(conn_db[0], POLICY_NAME, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_normal_test(bus, POLICY_NAME, conn_db);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_list(conn_db[0], KDBUS_LIST_NAMES |
+				     KDBUS_LIST_UNIQUE |
+				     KDBUS_LIST_ACTIVATORS |
+				     KDBUS_LIST_QUEUED);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_fork_test(bus, POLICY_NAME, conn_db, EXIT_SUCCESS);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * children connections are able to talk to conn_db[0] since
+	 * current POLICY_NAME TALK type is KDBUS_POLICY_ACCESS_WORLD,
+	 * so expect EXIT_SUCCESS when sending from child. However,
+	 * since the child's connection does not own any well-known
+	 * name, The parent connection conn_db[0] should fail with
+	 * -EPERM but since it is a privileged bus user the TALK is
+	 *  allowed.
+	 */
+	ret = kdbus_fork_test_by_id(bus, conn_db,
+				    EXIT_SUCCESS, EXIT_SUCCESS);
+	ASSERT_EXIT(ret == 0);
+
+	/*
+	 * Connections that can talk are perhaps being destroyed now.
+	 * Restrict the policy and purge cache entries where the
+	 * conn_db[0] is the destination.
+	 *
+	 * Now only connections with uid == 0 are allowed to talk.
+	 */
+	ret = kdbus_set_policy_talk(policy_holder, POLICY_NAME,
+				    geteuid(), KDBUS_POLICY_ACCESS_USER);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Testing connections (FORK+DROP) again:
+	 * After setting the policy re-check connections
+	 * we expect the children to fail with -EPERM
+	 */
+	ret = kdbus_fork_test(bus, POLICY_NAME, conn_db, -EPERM);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Now expect that both parent and child to fail.
+	 *
+	 * Child should fail with -EPERM since we just restricted
+	 * the POLICY_NAME TALK to uid 0 and its uid is 65534.
+	 *
+	 * Since the parent's connection will timeout when receiving
+	 * from the child, we never continue. FWIW just put -EPERM.
+	 */
+	ret = kdbus_fork_test_by_id(bus, conn_db, -EPERM, -EPERM);
+	ASSERT_EXIT(ret == 0);
+
+	/* Check if the name can be reached in a new userns */
+	ret = kdbus_clone_userns_test(bus, POLICY_NAME, conn_db, -EPERM);
+	ASSERT_RETURN(ret == 0);
+
+	for (i = 0; i < MAX_CONN; i++)
+		kdbus_conn_free(conn_db[i]);
+
+	kdbus_conn_free(activator);
+	kdbus_conn_free(policy_holder);
+
+	free(conn_db);
+
+	return ret;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-policy-priv.c linux-4.2-pck/tools/testing/selftests/kdbus/test-policy-priv.c
--- linux-4.2/tools/testing/selftests/kdbus/test-policy-priv.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-policy-priv.c	2015-09-08 00:48:22.248362932 -0300
@@ -0,0 +1,1285 @@
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/capability.h>
+#include <sys/eventfd.h>
+#include <sys/wait.h>
+
+#include "kdbus-test.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+static int test_policy_priv_by_id(const char *bus,
+				  struct kdbus_conn *conn_dst,
+				  bool drop_second_user,
+				  int parent_status,
+				  int child_status)
+{
+	int ret = 0;
+	uint64_t expected_cookie = time(NULL) ^ 0xdeadbeef;
+
+	ASSERT_RETURN(conn_dst);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, bus, ({
+		ret = kdbus_msg_send(unpriv, NULL,
+				     expected_cookie, 0, 0, 0,
+				     conn_dst->id);
+		ASSERT_EXIT(ret == child_status);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	ret = kdbus_msg_recv_poll(conn_dst, 300, NULL, NULL);
+	ASSERT_RETURN(ret == parent_status);
+
+	return 0;
+}
+
+static int test_policy_priv_by_broadcast(const char *bus,
+					 struct kdbus_conn *conn_dst,
+					 int drop_second_user,
+					 int parent_status,
+					 int child_status)
+{
+	int efd;
+	int ret = 0;
+	eventfd_t event_status = 0;
+	struct kdbus_msg *msg = NULL;
+	uid_t second_uid = UNPRIV_UID;
+	gid_t second_gid = UNPRIV_GID;
+	struct kdbus_conn *child_2 = conn_dst;
+	uint64_t expected_cookie = time(NULL) ^ 0xdeadbeef;
+
+	/* Drop to another unprivileged user other than UNPRIV_UID */
+	if (drop_second_user == DROP_OTHER_UNPRIV) {
+		second_uid = UNPRIV_UID - 1;
+		second_gid = UNPRIV_GID - 1;
+	}
+
+	/* child will signal parent to send broadcast */
+	efd = eventfd(0, EFD_CLOEXEC);
+	ASSERT_RETURN_VAL(efd >= 0, efd);
+
+	ret = RUN_UNPRIVILEGED(UNPRIV_UID, UNPRIV_GID, ({
+		struct kdbus_conn *child;
+
+		child = kdbus_hello(bus, 0, NULL, 0);
+		ASSERT_EXIT(child);
+
+		ret = kdbus_add_match_empty(child);
+		ASSERT_EXIT(ret == 0);
+
+		/* signal parent */
+		ret = eventfd_write(efd, 1);
+		ASSERT_EXIT(ret == 0);
+
+		/* Use a little bit high time */
+		ret = kdbus_msg_recv_poll(child, 500, &msg, NULL);
+		ASSERT_EXIT(ret == child_status);
+
+		/*
+		 * If we expect the child to get the broadcast
+		 * message, then check the received cookie.
+		 */
+		if (ret == 0) {
+			ASSERT_EXIT(expected_cookie == msg->cookie);
+		}
+
+		/* Use expected_cookie since 'msg' might be NULL */
+		ret = kdbus_msg_send(child, NULL, expected_cookie + 1,
+				     0, 0, 0, KDBUS_DST_ID_BROADCAST);
+		ASSERT_EXIT(ret == 0);
+
+		kdbus_msg_free(msg);
+		kdbus_conn_free(child);
+	}),
+	({
+		if (drop_second_user == DO_NOT_DROP) {
+			ASSERT_RETURN(child_2);
+
+			ret = eventfd_read(efd, &event_status);
+			ASSERT_RETURN(ret >= 0 && event_status == 1);
+
+			ret = kdbus_msg_send(child_2, NULL,
+					     expected_cookie, 0, 0, 0,
+					     KDBUS_DST_ID_BROADCAST);
+			ASSERT_RETURN(ret == 0);
+
+			/* drop own broadcast */
+			ret = kdbus_msg_recv(child_2, &msg, NULL);
+			ASSERT_RETURN(ret == 0);
+			ASSERT_RETURN(msg->src_id == child_2->id);
+			kdbus_msg_free(msg);
+
+			/* Use a little bit high time */
+			ret = kdbus_msg_recv_poll(child_2, 1000,
+						  &msg, NULL);
+			ASSERT_RETURN(ret == parent_status);
+
+			/*
+			 * Check returned cookie in case we expect
+			 * success.
+			 */
+			if (ret == 0) {
+				ASSERT_RETURN(msg->cookie ==
+					      expected_cookie + 1);
+			}
+
+			kdbus_msg_free(msg);
+		} else {
+			/*
+			 * Two unprivileged users will try to
+			 * communicate using broadcast.
+			 */
+			ret = RUN_UNPRIVILEGED(second_uid, second_gid, ({
+				child_2 = kdbus_hello(bus, 0, NULL, 0);
+				ASSERT_EXIT(child_2);
+
+				ret = kdbus_add_match_empty(child_2);
+				ASSERT_EXIT(ret == 0);
+
+				ret = eventfd_read(efd, &event_status);
+				ASSERT_EXIT(ret >= 0 && event_status == 1);
+
+				ret = kdbus_msg_send(child_2, NULL,
+						expected_cookie, 0, 0, 0,
+						KDBUS_DST_ID_BROADCAST);
+				ASSERT_EXIT(ret == 0);
+
+				/* drop own broadcast */
+				ret = kdbus_msg_recv(child_2, &msg, NULL);
+				ASSERT_RETURN(ret == 0);
+				ASSERT_RETURN(msg->src_id == child_2->id);
+				kdbus_msg_free(msg);
+
+				/* Use a little bit high time */
+				ret = kdbus_msg_recv_poll(child_2, 1000,
+							  &msg, NULL);
+				ASSERT_EXIT(ret == parent_status);
+
+				/*
+				 * Check returned cookie in case we expect
+				 * success.
+				 */
+				if (ret == 0) {
+					ASSERT_EXIT(msg->cookie ==
+						    expected_cookie + 1);
+				}
+
+				kdbus_msg_free(msg);
+				kdbus_conn_free(child_2);
+			}),
+			({ 0; }));
+			ASSERT_RETURN(ret == 0);
+		}
+	}));
+	ASSERT_RETURN(ret == 0);
+
+	close(efd);
+
+	return ret;
+}
+
+static void nosig(int sig)
+{
+}
+
+static int test_priv_before_policy_upload(struct kdbus_test_env *env)
+{
+	int ret = 0;
+	struct kdbus_conn *conn;
+
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn);
+
+	/*
+	 * Make sure unprivileged bus user cannot acquire names
+	 * before registring any policy holder.
+	 */
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_name_acquire(unpriv, "com.example.a", NULL);
+		ASSERT_EXIT(ret < 0);
+	}));
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Make sure unprivileged bus users cannot talk by default
+	 * to privileged ones, unless a policy holder that allows
+	 * this was uploaded.
+	 */
+
+	ret = test_policy_priv_by_id(env->buspath, conn, false,
+				     -ETIMEDOUT, -EPERM);
+	ASSERT_RETURN(ret == 0);
+
+	/* Activate matching for a privileged connection */
+	ret = kdbus_add_match_empty(conn);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * First make sure that BROADCAST with msg flag
+	 * KDBUS_MSG_EXPECT_REPLY will fail with -ENOTUNIQ
+	 */
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_msg_send(unpriv, NULL, 0xdeadbeef,
+				     KDBUS_MSG_EXPECT_REPLY,
+				     5000000000ULL, 0,
+				     KDBUS_DST_ID_BROADCAST);
+		ASSERT_EXIT(ret == -ENOTUNIQ);
+	}));
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Test broadcast with a privileged connection.
+	 *
+	 * The first unprivileged receiver should not get the
+	 * broadcast message sent by the privileged connection,
+	 * since there is no a TALK policy that allows the
+	 * unprivileged to TALK to the privileged connection. It
+	 * will fail with -ETIMEDOUT
+	 *
+	 * Then second case:
+	 * The privileged connection should get the broadcast
+	 * message from the unprivileged one. Since the receiver is
+	 * a privileged bus user and it has default TALK access to
+	 * all connections it will receive those.
+	 */
+
+	ret = test_policy_priv_by_broadcast(env->buspath, conn,
+					    DO_NOT_DROP,
+					    0, -ETIMEDOUT);
+	ASSERT_RETURN(ret == 0);
+
+
+	/*
+	 * Test broadcast with two unprivileged connections running
+	 * under the same user.
+	 *
+	 * Both connections should succeed.
+	 */
+
+	ret = test_policy_priv_by_broadcast(env->buspath, NULL,
+					    DROP_SAME_UNPRIV, 0, 0);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Test broadcast with two unprivileged connections running
+	 * under different users.
+	 *
+	 * Both connections will fail with -ETIMEDOUT.
+	 */
+
+	ret = test_policy_priv_by_broadcast(env->buspath, NULL,
+					    DROP_OTHER_UNPRIV,
+					    -ETIMEDOUT, -ETIMEDOUT);
+	ASSERT_RETURN(ret == 0);
+
+	kdbus_conn_free(conn);
+
+	return ret;
+}
+
+static int test_broadcast_after_policy_upload(struct kdbus_test_env *env)
+{
+	int ret;
+	int efd;
+	eventfd_t event_status = 0;
+	struct kdbus_msg *msg = NULL;
+	struct kdbus_conn *owner_a, *owner_b;
+	struct kdbus_conn *holder_a, *holder_b;
+	struct kdbus_policy_access access = {};
+	uint64_t expected_cookie = time(NULL) ^ 0xdeadbeef;
+
+	owner_a = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(owner_a);
+
+	ret = kdbus_name_acquire(owner_a, "com.example.broadcastA", NULL);
+	ASSERT_EXIT(ret >= 0);
+
+	/*
+	 * Make sure unprivileged bus users cannot talk by default
+	 * to privileged ones, unless a policy holder that allows
+	 * this was uploaded.
+	 */
+
+	++expected_cookie;
+	ret = test_policy_priv_by_id(env->buspath, owner_a, false,
+				     -ETIMEDOUT, -EPERM);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Make sure that privileged won't receive broadcasts unless
+	 * it installs a match. It will fail with -ETIMEDOUT
+	 *
+	 * At same time check that the unprivileged connection will
+	 * not receive the broadcast message from the privileged one
+	 * since the privileged one owns a name with a restricted
+	 * policy TALK (actually the TALK policy is still not
+	 * registered so we fail by default), thus the unprivileged
+	 * receiver is not able to TALK to that name.
+	 */
+
+	/* Activate matching for a privileged connection */
+	ret = kdbus_add_match_empty(owner_a);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Redo the previous test. The privileged conn owner_a is
+	 * able to TALK to any connection so it will receive the
+	 * broadcast message now.
+	 */
+
+	ret = test_policy_priv_by_broadcast(env->buspath, owner_a,
+					    DO_NOT_DROP,
+					    0, -ETIMEDOUT);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Test that broadcast between two unprivileged users running
+	 * under the same user still succeed.
+	 */
+
+	ret = test_policy_priv_by_broadcast(env->buspath, NULL,
+					    DROP_SAME_UNPRIV, 0, 0);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Test broadcast with two unprivileged connections running
+	 * under different users.
+	 *
+	 * Both connections will fail with -ETIMEDOUT.
+	 */
+
+	ret = test_policy_priv_by_broadcast(env->buspath, NULL,
+					    DROP_OTHER_UNPRIV,
+					    -ETIMEDOUT, -ETIMEDOUT);
+	ASSERT_RETURN(ret == 0);
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_USER,
+		.id = geteuid(),
+		.access = KDBUS_POLICY_OWN,
+	};
+
+	holder_a = kdbus_hello_registrar(env->buspath,
+					 "com.example.broadcastA",
+					 &access, 1,
+					 KDBUS_HELLO_POLICY_HOLDER);
+	ASSERT_RETURN(holder_a);
+
+	holder_b = kdbus_hello_registrar(env->buspath,
+					 "com.example.broadcastB",
+					 &access, 1,
+					 KDBUS_HELLO_POLICY_HOLDER);
+	ASSERT_RETURN(holder_b);
+
+	/* Free connections and their received messages and restart */
+	kdbus_conn_free(owner_a);
+
+	owner_a = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(owner_a);
+
+	/* Activate matching for a privileged connection */
+	ret = kdbus_add_match_empty(owner_a);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_name_acquire(owner_a, "com.example.broadcastA", NULL);
+	ASSERT_EXIT(ret >= 0);
+
+	owner_b = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(owner_b);
+
+	ret = kdbus_name_acquire(owner_b, "com.example.broadcastB", NULL);
+	ASSERT_EXIT(ret >= 0);
+
+	/* Activate matching for a privileged connection */
+	ret = kdbus_add_match_empty(owner_b);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Test that even if "com.example.broadcastA" and
+	 * "com.example.broadcastB" do have a TALK access by default
+	 * they are able to signal each other using broadcast due to
+	 * the fact they are privileged connections, they receive
+	 * all broadcasts if the match allows it.
+	 */
+
+	++expected_cookie;
+	ret = kdbus_msg_send(owner_a, NULL, expected_cookie, 0,
+			     0, 0, KDBUS_DST_ID_BROADCAST);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv_poll(owner_a, 100, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == expected_cookie);
+
+	/* Check src ID */
+	ASSERT_RETURN(msg->src_id == owner_a->id);
+
+	kdbus_msg_free(msg);
+
+	ret = kdbus_msg_recv_poll(owner_b, 100, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+	ASSERT_RETURN(msg->cookie == expected_cookie);
+
+	/* Check src ID */
+	ASSERT_RETURN(msg->src_id == owner_a->id);
+
+	kdbus_msg_free(msg);
+
+	/* Release name "com.example.broadcastB" */
+
+	ret = kdbus_name_release(owner_b, "com.example.broadcastB");
+	ASSERT_EXIT(ret >= 0);
+
+	/* KDBUS_POLICY_OWN for unprivileged connections */
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_WORLD,
+		.id = geteuid(),
+		.access = KDBUS_POLICY_OWN,
+	};
+
+	/* Update the policy so unprivileged will own the name */
+
+	ret = kdbus_conn_update_policy(holder_b,
+				       "com.example.broadcastB",
+				       &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Send broadcasts from an unprivileged connection that
+	 * owns a name "com.example.broadcastB".
+	 *
+	 * We'll have four destinations here:
+	 *
+	 * 1) destination owner_a: privileged connection that owns
+	 * "com.example.broadcastA". It will receive the broadcast
+	 * since it is a privileged has default TALK access to all
+	 * connections, and it is subscribed to the match.
+	 * Will succeed.
+	 *
+	 * owner_b: privileged connection (running under a different
+	 * uid) that do not own names, but with an empty broadcast
+	 * match, so it will receive broadcasts since it has default
+	 * TALK access to all connection.
+	 *
+	 * unpriv_a: unpriv connection that do not own any name.
+	 * It will receive the broadcast since it is running under
+	 * the same user of the one broadcasting and did install
+	 * matches. It should get the message.
+	 *
+	 * unpriv_b: unpriv connection is not interested in broadcast
+	 * messages, so it did not install broadcast matches. Should
+	 * fail with -ETIMEDOUT
+	 */
+
+	++expected_cookie;
+	efd = eventfd(0, EFD_CLOEXEC);
+	ASSERT_RETURN_VAL(efd >= 0, efd);
+
+	ret = RUN_UNPRIVILEGED(UNPRIV_UID, UNPRIV_UID, ({
+		struct kdbus_conn *unpriv_owner;
+		struct kdbus_conn *unpriv_a, *unpriv_b;
+
+		unpriv_owner = kdbus_hello(env->buspath, 0, NULL, 0);
+		ASSERT_EXIT(unpriv_owner);
+
+		unpriv_a = kdbus_hello(env->buspath, 0, NULL, 0);
+		ASSERT_EXIT(unpriv_a);
+
+		unpriv_b = kdbus_hello(env->buspath, 0, NULL, 0);
+		ASSERT_EXIT(unpriv_b);
+
+		ret = kdbus_name_acquire(unpriv_owner,
+					 "com.example.broadcastB",
+					 NULL);
+		ASSERT_EXIT(ret >= 0);
+
+		ret = kdbus_add_match_empty(unpriv_a);
+		ASSERT_EXIT(ret == 0);
+
+		/* Signal that we are doing broadcasts */
+		ret = eventfd_write(efd, 1);
+		ASSERT_EXIT(ret == 0);
+
+		/*
+		 * Do broadcast from a connection that owns the
+		 * names "com.example.broadcastB".
+		 */
+		ret = kdbus_msg_send(unpriv_owner, NULL,
+				     expected_cookie,
+				     0, 0, 0,
+				     KDBUS_DST_ID_BROADCAST);
+		ASSERT_EXIT(ret == 0);
+
+		/*
+		 * Unprivileged connection running under the same
+		 * user. It should succeed.
+		 */
+		ret = kdbus_msg_recv_poll(unpriv_a, 300, &msg, NULL);
+		ASSERT_EXIT(ret == 0 && msg->cookie == expected_cookie);
+
+		/*
+		 * Did not install matches, not interested in
+		 * broadcasts
+		 */
+		ret = kdbus_msg_recv_poll(unpriv_b, 300, NULL, NULL);
+		ASSERT_EXIT(ret == -ETIMEDOUT);
+	}),
+	({
+		ret = eventfd_read(efd, &event_status);
+		ASSERT_RETURN(ret >= 0 && event_status == 1);
+
+		/*
+		 * owner_a must fail with -ETIMEDOUT, since it owns
+		 * name "com.example.broadcastA" and its TALK
+		 * access is restriced.
+		 */
+		ret = kdbus_msg_recv_poll(owner_a, 300, &msg, NULL);
+		ASSERT_RETURN(ret == 0);
+
+		/* confirm the received cookie */
+		ASSERT_RETURN(msg->cookie == expected_cookie);
+
+		kdbus_msg_free(msg);
+
+		/*
+		 * owner_b got the broadcast from an unprivileged
+		 * connection.
+		 */
+		ret = kdbus_msg_recv_poll(owner_b, 300, &msg, NULL);
+		ASSERT_RETURN(ret == 0);
+
+		/* confirm the received cookie */
+		ASSERT_RETURN(msg->cookie == expected_cookie);
+
+		kdbus_msg_free(msg);
+
+	}));
+	ASSERT_RETURN(ret == 0);
+
+	close(efd);
+
+	/*
+	 * Test broadcast with two unprivileged connections running
+	 * under different users.
+	 *
+	 * Both connections will fail with -ETIMEDOUT.
+	 */
+
+	ret = test_policy_priv_by_broadcast(env->buspath, NULL,
+					    DROP_OTHER_UNPRIV,
+					    -ETIMEDOUT, -ETIMEDOUT);
+	ASSERT_RETURN(ret == 0);
+
+	/* Drop received broadcasts by privileged */
+	ret = kdbus_msg_recv_poll(owner_a, 100, NULL, NULL);
+	ret = kdbus_msg_recv_poll(owner_a, 100, NULL, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(owner_a, NULL, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	ret = kdbus_msg_recv_poll(owner_b, 100, NULL, NULL);
+	ret = kdbus_msg_recv_poll(owner_b, 100, NULL, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_msg_recv(owner_b, NULL, NULL);
+	ASSERT_RETURN(ret == -EAGAIN);
+
+	/*
+	 * Perform last tests, allow others to talk to name
+	 * "com.example.broadcastA". So now receiving broadcasts
+	 * from it should succeed since the TALK policy allow it.
+	 */
+
+	/* KDBUS_POLICY_OWN for unprivileged connections */
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_WORLD,
+		.id = geteuid(),
+		.access = KDBUS_POLICY_TALK,
+	};
+
+	ret = kdbus_conn_update_policy(holder_a,
+				       "com.example.broadcastA",
+				       &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Unprivileged is able to TALK to "com.example.broadcastA"
+	 * now so it will receive its broadcasts
+	 */
+	ret = test_policy_priv_by_broadcast(env->buspath, owner_a,
+					    DO_NOT_DROP, 0, 0);
+	ASSERT_RETURN(ret == 0);
+
+	++expected_cookie;
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_name_acquire(unpriv, "com.example.broadcastB",
+					 NULL);
+		ASSERT_EXIT(ret >= 0);
+		ret = kdbus_msg_send(unpriv, NULL, expected_cookie,
+				     0, 0, 0, KDBUS_DST_ID_BROADCAST);
+		ASSERT_EXIT(ret == 0);
+	}));
+	ASSERT_RETURN(ret == 0);
+
+	/* owner_a is privileged it will get the broadcast now. */
+	ret = kdbus_msg_recv_poll(owner_a, 300, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* confirm the received cookie */
+	ASSERT_RETURN(msg->cookie == expected_cookie);
+
+	kdbus_msg_free(msg);
+
+	/*
+	 * owner_a released name "com.example.broadcastA". It should
+	 * receive broadcasts since it is still privileged and has
+	 * the right match.
+	 *
+	 * Unprivileged connection will own a name and will try to
+	 * signal to the privileged connection.
+	 */
+
+	ret = kdbus_name_release(owner_a, "com.example.broadcastA");
+	ASSERT_EXIT(ret >= 0);
+
+	++expected_cookie;
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_name_acquire(unpriv, "com.example.broadcastB",
+					 NULL);
+		ASSERT_EXIT(ret >= 0);
+		ret = kdbus_msg_send(unpriv, NULL, expected_cookie,
+				     0, 0, 0, KDBUS_DST_ID_BROADCAST);
+		ASSERT_EXIT(ret == 0);
+	}));
+	ASSERT_RETURN(ret == 0);
+
+	/* owner_a will get the broadcast now. */
+	ret = kdbus_msg_recv_poll(owner_a, 300, &msg, NULL);
+	ASSERT_RETURN(ret == 0);
+
+	/* confirm the received cookie */
+	ASSERT_RETURN(msg->cookie == expected_cookie);
+
+	kdbus_msg_free(msg);
+
+	kdbus_conn_free(owner_a);
+	kdbus_conn_free(owner_b);
+	kdbus_conn_free(holder_a);
+	kdbus_conn_free(holder_b);
+
+	return 0;
+}
+
+static int test_policy_priv(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *conn_a, *conn_b, *conn, *owner;
+	struct kdbus_policy_access access, *acc;
+	sigset_t sset;
+	size_t num;
+	int ret;
+
+	/*
+	 * Make sure we have CAP_SETUID/SETGID so we can drop privileges
+	 */
+
+	ret = test_is_capable(CAP_SETUID, CAP_SETGID, -1);
+	ASSERT_RETURN(ret >= 0);
+
+	if (!ret)
+		return TEST_SKIP;
+
+	/* make sure that uids and gids are mapped */
+	if (!all_uids_gids_are_mapped())
+		return TEST_SKIP;
+
+	/*
+	 * Setup:
+	 *  conn_a: policy holder for com.example.a
+	 *  conn_b: name holder of com.example.b
+	 */
+
+	signal(SIGUSR1, nosig);
+	sigemptyset(&sset);
+	sigaddset(&sset, SIGUSR1);
+	sigprocmask(SIG_BLOCK, &sset, NULL);
+
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn);
+
+	/*
+	 * Before registering any policy holder, make sure that the
+	 * bus is secure by default. This test is necessary, it catches
+	 * several cases where old D-Bus was vulnerable.
+	 */
+
+	ret = test_priv_before_policy_upload(env);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Make sure unprivileged are not able to register policy
+	 * holders
+	 */
+
+	ret = RUN_UNPRIVILEGED(UNPRIV_UID, UNPRIV_GID, ({
+		struct kdbus_conn *holder;
+
+		holder = kdbus_hello_registrar(env->buspath,
+					       "com.example.a", NULL, 0,
+					       KDBUS_HELLO_POLICY_HOLDER);
+		ASSERT_EXIT(holder == NULL && errno == EPERM);
+	}),
+	({ 0; }));
+	ASSERT_RETURN(ret == 0);
+
+
+	/* Register policy holder */
+
+	conn_a = kdbus_hello_registrar(env->buspath, "com.example.a",
+				       NULL, 0, KDBUS_HELLO_POLICY_HOLDER);
+	ASSERT_RETURN(conn_a);
+
+	conn_b = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn_b);
+
+	ret = kdbus_name_acquire(conn_b, "com.example.b", NULL);
+	ASSERT_EXIT(ret >= 0);
+
+	/*
+	 * Make sure bus-owners can always acquire names.
+	 */
+	ret = kdbus_name_acquire(conn, "com.example.a", NULL);
+	ASSERT_EXIT(ret >= 0);
+
+	kdbus_conn_free(conn);
+
+	/*
+	 * Make sure unprivileged users cannot acquire names with default
+	 * policy assigned.
+	 */
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_name_acquire(unpriv, "com.example.a", NULL);
+		ASSERT_EXIT(ret < 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * Make sure unprivileged users can acquire names if we make them
+	 * world-accessible.
+	 */
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_WORLD,
+		.id = 0,
+		.access = KDBUS_POLICY_OWN,
+	};
+
+	/*
+	 * Make sure unprivileged/normal connections are not able
+	 * to update policies
+	 */
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_conn_update_policy(unpriv, "com.example.a",
+					       &access, 1);
+		ASSERT_EXIT(ret == -EOPNOTSUPP);
+	}));
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.a", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_name_acquire(unpriv, "com.example.a", NULL);
+		ASSERT_EXIT(ret >= 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * Make sure unprivileged users can acquire names if we make them
+	 * gid-accessible. But only if the gid matches.
+	 */
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_GROUP,
+		.id = UNPRIV_GID,
+		.access = KDBUS_POLICY_OWN,
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.a", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_name_acquire(unpriv, "com.example.a", NULL);
+		ASSERT_EXIT(ret >= 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_GROUP,
+		.id = 1,
+		.access = KDBUS_POLICY_OWN,
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.a", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_name_acquire(unpriv, "com.example.a", NULL);
+		ASSERT_EXIT(ret < 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * Make sure unprivileged users can acquire names if we make them
+	 * uid-accessible. But only if the uid matches.
+	 */
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_USER,
+		.id = UNPRIV_UID,
+		.access = KDBUS_POLICY_OWN,
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.a", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_name_acquire(unpriv, "com.example.a", NULL);
+		ASSERT_EXIT(ret >= 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_USER,
+		.id = 1,
+		.access = KDBUS_POLICY_OWN,
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.a", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_name_acquire(unpriv, "com.example.a", NULL);
+		ASSERT_EXIT(ret < 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * Make sure unprivileged users cannot acquire names if no owner-policy
+	 * matches, even if SEE/TALK policies match.
+	 */
+
+	num = 4;
+	acc = (struct kdbus_policy_access[]){
+		{
+			.type = KDBUS_POLICY_ACCESS_GROUP,
+			.id = UNPRIV_GID,
+			.access = KDBUS_POLICY_SEE,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = UNPRIV_UID,
+			.access = KDBUS_POLICY_TALK,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_WORLD,
+			.id = 0,
+			.access = KDBUS_POLICY_TALK,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_WORLD,
+			.id = 0,
+			.access = KDBUS_POLICY_SEE,
+		},
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.a", acc, num);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_name_acquire(unpriv, "com.example.a", NULL);
+		ASSERT_EXIT(ret < 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * Make sure unprivileged users can acquire names if the only matching
+	 * policy is somewhere in the middle.
+	 */
+
+	num = 5;
+	acc = (struct kdbus_policy_access[]){
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = 1,
+			.access = KDBUS_POLICY_OWN,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = 2,
+			.access = KDBUS_POLICY_OWN,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = UNPRIV_UID,
+			.access = KDBUS_POLICY_OWN,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = 3,
+			.access = KDBUS_POLICY_OWN,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = 4,
+			.access = KDBUS_POLICY_OWN,
+		},
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.a", acc, num);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_name_acquire(unpriv, "com.example.a", NULL);
+		ASSERT_EXIT(ret >= 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * Clear policies
+	 */
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.a", NULL, 0);
+	ASSERT_RETURN(ret == 0);
+
+	/*
+	 * Make sure privileged bus users can _always_ talk to others.
+	 */
+
+	conn = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn);
+
+	ret = kdbus_msg_send(conn, "com.example.b", 0xdeadbeef, 0, 0, 0, 0);
+	ASSERT_EXIT(ret >= 0);
+
+	ret = kdbus_msg_recv_poll(conn_b, 300, NULL, NULL);
+	ASSERT_EXIT(ret >= 0);
+
+	kdbus_conn_free(conn);
+
+	/*
+	 * Make sure unprivileged bus users cannot talk by default.
+	 */
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_msg_send(unpriv, "com.example.b", 0xdeadbeef, 0, 0,
+				     0, 0);
+		ASSERT_EXIT(ret == -EPERM);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * Make sure unprivileged bus users can talk to equals, even without
+	 * policy.
+	 */
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_USER,
+		.id = UNPRIV_UID,
+		.access = KDBUS_POLICY_OWN,
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.c", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		struct kdbus_conn *owner;
+
+		owner = kdbus_hello(env->buspath, 0, NULL, 0);
+		ASSERT_RETURN(owner);
+
+		ret = kdbus_name_acquire(owner, "com.example.c", NULL);
+		ASSERT_EXIT(ret >= 0);
+
+		ret = kdbus_msg_send(unpriv, "com.example.c", 0xdeadbeef, 0, 0,
+				     0, 0);
+		ASSERT_EXIT(ret >= 0);
+		ret = kdbus_msg_recv_poll(owner, 100, NULL, NULL);
+		ASSERT_EXIT(ret >= 0);
+
+		kdbus_conn_free(owner);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * Make sure unprivileged bus users can talk to privileged users if a
+	 * suitable UID policy is set.
+	 */
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_USER,
+		.id = UNPRIV_UID,
+		.access = KDBUS_POLICY_TALK,
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.b", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_msg_send(unpriv, "com.example.b", 0xdeadbeef, 0, 0,
+				     0, 0);
+		ASSERT_EXIT(ret >= 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	ret = kdbus_msg_recv_poll(conn_b, 100, NULL, NULL);
+	ASSERT_EXIT(ret >= 0);
+
+	/*
+	 * Make sure unprivileged bus users can talk to privileged users if a
+	 * suitable GID policy is set.
+	 */
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_GROUP,
+		.id = UNPRIV_GID,
+		.access = KDBUS_POLICY_TALK,
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.b", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_msg_send(unpriv, "com.example.b", 0xdeadbeef, 0, 0,
+				     0, 0);
+		ASSERT_EXIT(ret >= 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	ret = kdbus_msg_recv_poll(conn_b, 100, NULL, NULL);
+	ASSERT_EXIT(ret >= 0);
+
+	/*
+	 * Make sure unprivileged bus users can talk to privileged users if a
+	 * suitable WORLD policy is set.
+	 */
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_WORLD,
+		.id = 0,
+		.access = KDBUS_POLICY_TALK,
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.b", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_msg_send(unpriv, "com.example.b", 0xdeadbeef, 0, 0,
+				     0, 0);
+		ASSERT_EXIT(ret >= 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	ret = kdbus_msg_recv_poll(conn_b, 100, NULL, NULL);
+	ASSERT_EXIT(ret >= 0);
+
+	/*
+	 * Make sure unprivileged bus users cannot talk to privileged users if
+	 * no suitable policy is set.
+	 */
+
+	num = 5;
+	acc = (struct kdbus_policy_access[]){
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = 0,
+			.access = KDBUS_POLICY_OWN,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = 1,
+			.access = KDBUS_POLICY_TALK,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = UNPRIV_UID,
+			.access = KDBUS_POLICY_SEE,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = 3,
+			.access = KDBUS_POLICY_TALK,
+		},
+		{
+			.type = KDBUS_POLICY_ACCESS_USER,
+			.id = 4,
+			.access = KDBUS_POLICY_TALK,
+		},
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.b", acc, num);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_msg_send(unpriv, "com.example.b", 0xdeadbeef, 0, 0,
+				     0, 0);
+		ASSERT_EXIT(ret == -EPERM);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * Make sure unprivileged bus users can talk to privileged users if a
+	 * suitable OWN privilege overwrites TALK.
+	 */
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_WORLD,
+		.id = 0,
+		.access = KDBUS_POLICY_OWN,
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.b", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_msg_send(unpriv, "com.example.b", 0xdeadbeef, 0, 0,
+				     0, 0);
+		ASSERT_EXIT(ret >= 0);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	ret = kdbus_msg_recv_poll(conn_b, 100, NULL, NULL);
+	ASSERT_EXIT(ret >= 0);
+
+	/*
+	 * Make sure the TALK cache is reset correctly when policies are
+	 * updated.
+	 */
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_WORLD,
+		.id = 0,
+		.access = KDBUS_POLICY_TALK,
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.b", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = RUN_UNPRIVILEGED_CONN(unpriv, env->buspath, ({
+		ret = kdbus_msg_send(unpriv, "com.example.b", 0xdeadbeef, 0, 0,
+				     0, 0);
+		ASSERT_EXIT(ret >= 0);
+
+		ret = kdbus_msg_recv_poll(conn_b, 100, NULL, NULL);
+		ASSERT_EXIT(ret >= 0);
+
+		ret = kdbus_conn_update_policy(conn_a, "com.example.b",
+					       NULL, 0);
+		ASSERT_RETURN(ret == 0);
+
+		ret = kdbus_msg_send(unpriv, "com.example.b", 0xdeadbeef, 0, 0,
+				     0, 0);
+		ASSERT_EXIT(ret == -EPERM);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+	/*
+	 * Make sure the TALK cache is reset correctly when policy holders
+	 * disconnect.
+	 */
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_WORLD,
+		.id = 0,
+		.access = KDBUS_POLICY_OWN,
+	};
+
+	conn = kdbus_hello_registrar(env->buspath, "com.example.c",
+				     NULL, 0, KDBUS_HELLO_POLICY_HOLDER);
+	ASSERT_RETURN(conn);
+
+	ret = kdbus_conn_update_policy(conn, "com.example.c", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	owner = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(owner);
+
+	ret = kdbus_name_acquire(owner, "com.example.c", NULL);
+	ASSERT_RETURN(ret >= 0);
+
+	ret = RUN_UNPRIVILEGED(UNPRIV_UID, UNPRIV_GID, ({
+		struct kdbus_conn *unpriv;
+
+		/* wait for parent to be finished */
+		sigemptyset(&sset);
+		ret = sigsuspend(&sset);
+		ASSERT_RETURN(ret == -1 && errno == EINTR);
+
+		unpriv = kdbus_hello(env->buspath, 0, NULL, 0);
+		ASSERT_RETURN(unpriv);
+
+		ret = kdbus_msg_send(unpriv, "com.example.c", 0xdeadbeef, 0, 0,
+				     0, 0);
+		ASSERT_EXIT(ret >= 0);
+
+		ret = kdbus_msg_recv_poll(owner, 100, NULL, NULL);
+		ASSERT_EXIT(ret >= 0);
+
+		/* free policy holder */
+		kdbus_conn_free(conn);
+
+		ret = kdbus_msg_send(unpriv, "com.example.c", 0xdeadbeef, 0, 0,
+				     0, 0);
+		ASSERT_EXIT(ret == -EPERM);
+
+		kdbus_conn_free(unpriv);
+	}), ({
+		/* make sure policy holder is only valid in child */
+		kdbus_conn_free(conn);
+		kill(pid, SIGUSR1);
+	}));
+	ASSERT_RETURN(ret >= 0);
+
+
+	/*
+	 * The following tests are necessary.
+	 */
+
+	ret = test_broadcast_after_policy_upload(env);
+	ASSERT_RETURN(ret == 0);
+
+	kdbus_conn_free(owner);
+
+	/*
+	 * cleanup resources
+	 */
+
+	kdbus_conn_free(conn_b);
+	kdbus_conn_free(conn_a);
+
+	return TEST_OK;
+}
+
+int kdbus_test_policy_priv(struct kdbus_test_env *env)
+{
+	pid_t pid;
+	int ret;
+
+	/* make sure to exit() if a child returns from fork() */
+	pid = getpid();
+	ret = test_policy_priv(env);
+	if (pid != getpid())
+		exit(1);
+
+	return ret;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-policy.c linux-4.2-pck/tools/testing/selftests/kdbus/test-policy.c
--- linux-4.2/tools/testing/selftests/kdbus/test-policy.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-policy.c	2015-09-08 00:48:22.248362932 -0300
@@ -0,0 +1,80 @@
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "kdbus-test.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+int kdbus_test_policy(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *conn_a, *conn_b;
+	struct kdbus_policy_access access;
+	int ret;
+
+	/* Invalid name */
+	conn_a = kdbus_hello_registrar(env->buspath, ".example.a",
+				       NULL, 0, KDBUS_HELLO_POLICY_HOLDER);
+	ASSERT_RETURN(conn_a == NULL);
+
+	conn_a = kdbus_hello_registrar(env->buspath, "example",
+				       NULL, 0, KDBUS_HELLO_POLICY_HOLDER);
+	ASSERT_RETURN(conn_a == NULL);
+
+	conn_a = kdbus_hello_registrar(env->buspath, "com.example.a",
+				       NULL, 0, KDBUS_HELLO_POLICY_HOLDER);
+	ASSERT_RETURN(conn_a);
+
+	conn_b = kdbus_hello_registrar(env->buspath, "com.example.b",
+				       NULL, 0, KDBUS_HELLO_POLICY_HOLDER);
+	ASSERT_RETURN(conn_b);
+
+	/*
+	 * Verify there cannot be any duplicate entries, except for specific vs.
+	 * wildcard entries.
+	 */
+
+	access = (struct kdbus_policy_access){
+		.type = KDBUS_POLICY_ACCESS_USER,
+		.id = geteuid(),
+		.access = KDBUS_POLICY_SEE,
+	};
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.a", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_conn_update_policy(conn_b, "com.example.a", &access, 1);
+	ASSERT_RETURN(ret == -EEXIST);
+
+	ret = kdbus_conn_update_policy(conn_b, "com.example.a.*", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.a.*", &access, 1);
+	ASSERT_RETURN(ret == -EEXIST);
+
+	ret = kdbus_conn_update_policy(conn_a, "com.example.*", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_conn_update_policy(conn_b, "com.example.a", &access, 1);
+	ASSERT_RETURN(ret == 0);
+
+	ret = kdbus_conn_update_policy(conn_b, "com.example.*", &access, 1);
+	ASSERT_RETURN(ret == -EEXIST);
+
+	/* Invalid name */
+	ret = kdbus_conn_update_policy(conn_b, ".example.*", &access, 1);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	ret = kdbus_conn_update_policy(conn_b, "example", &access, 1);
+	ASSERT_RETURN(ret == -EINVAL);
+
+	kdbus_conn_free(conn_b);
+	kdbus_conn_free(conn_a);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-sync.c linux-4.2-pck/tools/testing/selftests/kdbus/test-sync.c
--- linux-4.2/tools/testing/selftests/kdbus/test-sync.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-sync.c	2015-09-08 00:48:22.248362932 -0300
@@ -0,0 +1,369 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/eventfd.h>
+
+#include "kdbus-api.h"
+#include "kdbus-test.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+static struct kdbus_conn *conn_a, *conn_b;
+static unsigned int cookie = 0xdeadbeef;
+
+static void nop_handler(int sig) {}
+
+static int interrupt_sync(struct kdbus_conn *conn_src,
+			  struct kdbus_conn *conn_dst)
+{
+	pid_t pid;
+	int ret, status;
+	struct kdbus_msg *msg = NULL;
+	struct sigaction sa = {
+		.sa_handler = nop_handler,
+		.sa_flags = SA_NOCLDSTOP|SA_RESTART,
+	};
+
+	cookie++;
+	pid = fork();
+	ASSERT_RETURN_VAL(pid >= 0, pid);
+
+	if (pid == 0) {
+		ret = sigaction(SIGINT, &sa, NULL);
+		ASSERT_EXIT(ret == 0);
+
+		ret = kdbus_msg_send_sync(conn_dst, NULL, cookie,
+					  KDBUS_MSG_EXPECT_REPLY,
+					  100000000ULL, 0, conn_src->id, -1);
+		ASSERT_EXIT(ret == -ETIMEDOUT);
+
+		_exit(EXIT_SUCCESS);
+	}
+
+	ret = kdbus_msg_recv_poll(conn_src, 100, &msg, NULL);
+	ASSERT_RETURN(ret == 0 && msg->cookie == cookie);
+
+	kdbus_msg_free(msg);
+
+	ret = kill(pid, SIGINT);
+	ASSERT_RETURN_VAL(ret == 0, ret);
+
+	ret = waitpid(pid, &status, 0);
+	ASSERT_RETURN_VAL(ret >= 0, ret);
+
+	if (WIFSIGNALED(status))
+		return TEST_ERR;
+
+	ret = kdbus_msg_recv_poll(conn_src, 100, NULL, NULL);
+	ASSERT_RETURN(ret == -ETIMEDOUT);
+
+	return (status == EXIT_SUCCESS) ? TEST_OK : TEST_ERR;
+}
+
+static int close_epipe_sync(const char *bus)
+{
+	pid_t pid;
+	int ret, status;
+	struct kdbus_conn *conn_src;
+	struct kdbus_conn *conn_dst;
+	struct kdbus_msg *msg = NULL;
+
+	conn_src = kdbus_hello(bus, 0, NULL, 0);
+	ASSERT_RETURN(conn_src);
+
+	ret = kdbus_add_match_empty(conn_src);
+	ASSERT_RETURN(ret == 0);
+
+	conn_dst = kdbus_hello(bus, 0, NULL, 0);
+	ASSERT_RETURN(conn_dst);
+
+	cookie++;
+	pid = fork();
+	ASSERT_RETURN_VAL(pid >= 0, pid);
+
+	if (pid == 0) {
+		uint64_t dst_id;
+
+		/* close our reference */
+		dst_id = conn_dst->id;
+		kdbus_conn_free(conn_dst);
+
+		ret = kdbus_msg_recv_poll(conn_src, 100, &msg, NULL);
+		ASSERT_EXIT(ret == 0 && msg->cookie == cookie);
+		ASSERT_EXIT(msg->src_id == dst_id);
+
+		cookie++;
+		ret = kdbus_msg_send_sync(conn_src, NULL, cookie,
+					  KDBUS_MSG_EXPECT_REPLY,
+					  100000000ULL, 0, dst_id, -1);
+		ASSERT_EXIT(ret == -EPIPE);
+
+		_exit(EXIT_SUCCESS);
+	}
+
+	ret = kdbus_msg_send(conn_dst, NULL, cookie, 0, 0, 0,
+			     KDBUS_DST_ID_BROADCAST);
+	ASSERT_RETURN(ret == 0);
+
+	cookie++;
+	ret = kdbus_msg_recv_poll(conn_dst, 100, &msg, NULL);
+	ASSERT_RETURN(ret == 0 && msg->cookie == cookie);
+
+	kdbus_msg_free(msg);
+
+	/* destroy connection */
+	kdbus_conn_free(conn_dst);
+	kdbus_conn_free(conn_src);
+
+	ret = waitpid(pid, &status, 0);
+	ASSERT_RETURN_VAL(ret >= 0, ret);
+
+	if (!WIFEXITED(status))
+		return TEST_ERR;
+
+	return (status == EXIT_SUCCESS) ? TEST_OK : TEST_ERR;
+}
+
+static int cancel_fd_sync(struct kdbus_conn *conn_src,
+			  struct kdbus_conn *conn_dst)
+{
+	pid_t pid;
+	int cancel_fd;
+	int ret, status;
+	uint64_t counter = 1;
+	struct kdbus_msg *msg = NULL;
+
+	cancel_fd = eventfd(0, 0);
+	ASSERT_RETURN_VAL(cancel_fd >= 0, cancel_fd);
+
+	cookie++;
+	pid = fork();
+	ASSERT_RETURN_VAL(pid >= 0, pid);
+
+	if (pid == 0) {
+		ret = kdbus_msg_send_sync(conn_dst, NULL, cookie,
+					  KDBUS_MSG_EXPECT_REPLY,
+					  100000000ULL, 0, conn_src->id,
+					  cancel_fd);
+		ASSERT_EXIT(ret == -ECANCELED);
+
+		_exit(EXIT_SUCCESS);
+	}
+
+	ret = kdbus_msg_recv_poll(conn_src, 100, &msg, NULL);
+	ASSERT_RETURN(ret == 0 && msg->cookie == cookie);
+
+	kdbus_msg_free(msg);
+
+	ret = write(cancel_fd, &counter, sizeof(counter));
+	ASSERT_RETURN(ret == sizeof(counter));
+
+	ret = waitpid(pid, &status, 0);
+	ASSERT_RETURN_VAL(ret >= 0, ret);
+
+	if (WIFSIGNALED(status))
+		return TEST_ERR;
+
+	return (status == EXIT_SUCCESS) ? TEST_OK : TEST_ERR;
+}
+
+static int no_cancel_sync(struct kdbus_conn *conn_src,
+			  struct kdbus_conn *conn_dst)
+{
+	pid_t pid;
+	int cancel_fd;
+	int ret, status;
+	struct kdbus_msg *msg = NULL;
+
+	/* pass eventfd, but never signal it so it shouldn't have any effect */
+
+	cancel_fd = eventfd(0, 0);
+	ASSERT_RETURN_VAL(cancel_fd >= 0, cancel_fd);
+
+	cookie++;
+	pid = fork();
+	ASSERT_RETURN_VAL(pid >= 0, pid);
+
+	if (pid == 0) {
+		ret = kdbus_msg_send_sync(conn_dst, NULL, cookie,
+					  KDBUS_MSG_EXPECT_REPLY,
+					  100000000ULL, 0, conn_src->id,
+					  cancel_fd);
+		ASSERT_EXIT(ret == 0);
+
+		_exit(EXIT_SUCCESS);
+	}
+
+	ret = kdbus_msg_recv_poll(conn_src, 100, &msg, NULL);
+	ASSERT_RETURN_VAL(ret == 0 && msg->cookie == cookie, -1);
+
+	kdbus_msg_free(msg);
+
+	ret = kdbus_msg_send_reply(conn_src, cookie, conn_dst->id);
+	ASSERT_RETURN_VAL(ret >= 0, ret);
+
+	ret = waitpid(pid, &status, 0);
+	ASSERT_RETURN_VAL(ret >= 0, ret);
+
+	if (WIFSIGNALED(status))
+		return -1;
+
+	return (status == EXIT_SUCCESS) ? 0 : -1;
+}
+
+static void *run_thread_reply(void *data)
+{
+	int ret;
+	unsigned long status = TEST_OK;
+
+	ret = kdbus_msg_recv_poll(conn_a, 3000, NULL, NULL);
+	if (ret < 0)
+		goto exit_thread;
+
+	kdbus_printf("Thread received message, sending reply ...\n");
+
+	/* using an unknown cookie must fail */
+	ret = kdbus_msg_send_reply(conn_a, ~cookie, conn_b->id);
+	if (ret != -EBADSLT) {
+		status = TEST_ERR;
+		goto exit_thread;
+	}
+
+	ret = kdbus_msg_send_reply(conn_a, cookie, conn_b->id);
+	if (ret != 0) {
+		status = TEST_ERR;
+		goto exit_thread;
+	}
+
+exit_thread:
+	pthread_exit(NULL);
+	return (void *) status;
+}
+
+int kdbus_test_sync_reply(struct kdbus_test_env *env)
+{
+	unsigned long status;
+	pthread_t thread;
+	int ret;
+
+	conn_a = kdbus_hello(env->buspath, 0, NULL, 0);
+	conn_b = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn_a && conn_b);
+
+	pthread_create(&thread, NULL, run_thread_reply, NULL);
+
+	ret = kdbus_msg_send_sync(conn_b, NULL, cookie,
+				  KDBUS_MSG_EXPECT_REPLY,
+				  5000000000ULL, 0, conn_a->id, -1);
+
+	pthread_join(thread, (void *) &status);
+	ASSERT_RETURN(status == 0);
+	ASSERT_RETURN(ret == 0);
+
+	ret = interrupt_sync(conn_a, conn_b);
+	ASSERT_RETURN(ret == 0);
+
+	ret = close_epipe_sync(env->buspath);
+	ASSERT_RETURN(ret == 0);
+
+	ret = cancel_fd_sync(conn_a, conn_b);
+	ASSERT_RETURN(ret == 0);
+
+	ret = no_cancel_sync(conn_a, conn_b);
+	ASSERT_RETURN(ret == 0);
+
+	kdbus_printf("-- closing bus connections\n");
+
+	kdbus_conn_free(conn_a);
+	kdbus_conn_free(conn_b);
+
+	return TEST_OK;
+}
+
+#define BYEBYE_ME ((void*)0L)
+#define BYEBYE_THEM ((void*)1L)
+
+static void *run_thread_byebye(void *data)
+{
+	struct kdbus_cmd cmd_byebye = { .size = sizeof(cmd_byebye) };
+	int ret;
+
+	ret = kdbus_msg_recv_poll(conn_a, 3000, NULL, NULL);
+	if (ret == 0) {
+		kdbus_printf("Thread received message, invoking BYEBYE ...\n");
+		kdbus_msg_recv(conn_a, NULL, NULL);
+		if (data == BYEBYE_ME)
+			kdbus_cmd_byebye(conn_b->fd, &cmd_byebye);
+		else if (data == BYEBYE_THEM)
+			kdbus_cmd_byebye(conn_a->fd, &cmd_byebye);
+	}
+
+	pthread_exit(NULL);
+	return NULL;
+}
+
+int kdbus_test_sync_byebye(struct kdbus_test_env *env)
+{
+	pthread_t thread;
+	int ret;
+
+	/*
+	 * This sends a synchronous message to a thread, which waits until it
+	 * received the message and then invokes BYEBYE on the *ORIGINAL*
+	 * connection. That is, on the same connection that synchronously waits
+	 * for an reply.
+	 * This should properly wake the connection up and cause ECONNRESET as
+	 * the connection is disconnected now.
+	 *
+	 * The second time, we do the same but invoke BYEBYE on the *TARGET*
+	 * connection. This should also wake up the synchronous sender as the
+	 * reply cannot be sent by a disconnected target.
+	 */
+
+	conn_a = kdbus_hello(env->buspath, 0, NULL, 0);
+	conn_b = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn_a && conn_b);
+
+	pthread_create(&thread, NULL, run_thread_byebye, BYEBYE_ME);
+
+	ret = kdbus_msg_send_sync(conn_b, NULL, cookie,
+				  KDBUS_MSG_EXPECT_REPLY,
+				  5000000000ULL, 0, conn_a->id, -1);
+
+	ASSERT_RETURN(ret == -ECONNRESET);
+
+	pthread_join(thread, NULL);
+
+	kdbus_conn_free(conn_a);
+	kdbus_conn_free(conn_b);
+
+	conn_a = kdbus_hello(env->buspath, 0, NULL, 0);
+	conn_b = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn_a && conn_b);
+
+	pthread_create(&thread, NULL, run_thread_byebye, BYEBYE_THEM);
+
+	ret = kdbus_msg_send_sync(conn_b, NULL, cookie,
+				  KDBUS_MSG_EXPECT_REPLY,
+				  5000000000ULL, 0, conn_a->id, -1);
+
+	ASSERT_RETURN(ret == -EPIPE);
+
+	pthread_join(thread, NULL);
+
+	kdbus_conn_free(conn_a);
+	kdbus_conn_free(conn_b);
+
+	return TEST_OK;
+}
diff -Nur linux-4.2/tools/testing/selftests/kdbus/test-timeout.c linux-4.2-pck/tools/testing/selftests/kdbus/test-timeout.c
--- linux-4.2/tools/testing/selftests/kdbus/test-timeout.c	1969-12-31 21:00:00.000000000 -0300
+++ linux-4.2-pck/tools/testing/selftests/kdbus/test-timeout.c	2015-09-08 00:48:22.248362932 -0300
@@ -0,0 +1,99 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <assert.h>
+#include <poll.h>
+#include <stdbool.h>
+
+#include "kdbus-api.h"
+#include "kdbus-test.h"
+#include "kdbus-util.h"
+#include "kdbus-enum.h"
+
+int timeout_msg_recv(struct kdbus_conn *conn, uint64_t *expected)
+{
+	struct kdbus_cmd_recv recv = { .size = sizeof(recv) };
+	struct kdbus_msg *msg;
+	int ret;
+
+	ret = kdbus_cmd_recv(conn->fd, &recv);
+	if (ret < 0) {
+		kdbus_printf("error receiving message: %d (%m)\n", ret);
+		return ret;
+	}
+
+	msg = (struct kdbus_msg *)(conn->buf + recv.msg.offset);
+
+	ASSERT_RETURN_VAL(msg->payload_type == KDBUS_PAYLOAD_KERNEL, -EINVAL);
+	ASSERT_RETURN_VAL(msg->src_id == KDBUS_SRC_ID_KERNEL, -EINVAL);
+	ASSERT_RETURN_VAL(msg->dst_id == conn->id, -EINVAL);
+
+	*expected &= ~(1ULL << msg->cookie_reply);
+	kdbus_printf("Got message timeout for cookie %llu\n",
+		     msg->cookie_reply);
+
+	ret = kdbus_free(conn, recv.msg.offset);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int kdbus_test_timeout(struct kdbus_test_env *env)
+{
+	struct kdbus_conn *conn_a, *conn_b;
+	struct pollfd fd;
+	int ret, i, n_msgs = 4;
+	uint64_t expected = 0;
+	uint64_t cookie = 0xdeadbeef;
+
+	conn_a = kdbus_hello(env->buspath, 0, NULL, 0);
+	conn_b = kdbus_hello(env->buspath, 0, NULL, 0);
+	ASSERT_RETURN(conn_a && conn_b);
+
+	fd.fd = conn_b->fd;
+
+	/*
+	 * send messages that expect a reply (within 100 msec),
+	 * but never answer it.
+	 */
+	for (i = 0; i < n_msgs; i++, cookie++) {
+		kdbus_printf("Sending message with cookie %llu ...\n",
+			     (unsigned long long)cookie);
+		ASSERT_RETURN(kdbus_msg_send(conn_b, NULL, cookie,
+			      KDBUS_MSG_EXPECT_REPLY,
+			      (i + 1) * 100ULL * 1000000ULL, 0,
+			      conn_a->id) == 0);
+		expected |= 1ULL << cookie;
+	}
+
+	for (;;) {
+		fd.events = POLLIN | POLLPRI | POLLHUP;
+		fd.revents = 0;
+
+		ret = poll(&fd, 1, (n_msgs + 1) * 100);
+		if (ret == 0)
+			kdbus_printf("--- timeout\n");
+		if (ret <= 0)
+			break;
+
+		if (fd.revents & POLLIN)
+			ASSERT_RETURN(!timeout_msg_recv(conn_b, &expected));
+
+		if (expected == 0)
+			break;
+	}
+
+	ASSERT_RETURN(expected == 0);
+
+	kdbus_conn_free(conn_a);
+	kdbus_conn_free(conn_b);
+
+	return TEST_OK;
+}