\documentclass[fleqn]{article}
\usepackage[utf8]{inputenc}
\usepackage[round,longnamesfirst]{natbib}
\usepackage{graphicx,keyval,a4wide,thumbpdf,makeidx,color,colordvi}
\usepackage{amsfonts,hyperref}

\newcommand\R{\textsf{R}}
\newcommand{\pkg}[1]{{\normalfont\fontseries{b}\selectfont #1}}
\newcommand{\sQuote}[1]{`{#1}'}
\newcommand{\dQuote}[1]{``{#1}''}
\newcommand{\file}[1]{\sQuote{\textsf{#1}}}
\newcommand{\data}[1]{\texttt{#1}}
\newcommand{\var}[1]{\textit{#1}}
\newcommand{\class}[1]{\textsf{#1}}
\newcommand{\proglang}[1]{\textsf{#1}}
%% \code without `-' ligatures
\def\nohyphenation{\hyphenchar\font=-1 \aftergroup\restorehyphenation}
\def\restorehyphenation{\hyphenchar\font=`-}
{\catcode`\-=\active%
  \global\def\code{\bgroup%
    \catcode`\-=\active \let-\codedash%
    \Rd@code}}
\def\codedash{-\discretionary{}{}{}}
\def\Rd@code#1{\texttt{\nohyphenation#1}\egroup}
\newcommand{\codefun}[1]{\code{#1()}}
\newcommand{\codefunind}[1]{\codefun{#1}\index{\texttt{#1}}}
\newcommand{\codeind}[1]{\code{#1}\index{\texttt{#1}}}

\SweaveOpts{strip.white=true}

\AtBeginDocument{\setkeys{Gin}{width=0.5\textwidth}}

\definecolor{Blue}{rgb}{0,0,0.8}
\definecolor{Red}{rgb}{0.7,0,0}

% \date{2018-04-07}
\title{Knowledge Space Theory Input and Output}
\author{Cord Hockemeyer}
%\VignetteIndexEntry{KSTIO}
%\VignetteDepends{kstIO}
%\VignetteKeywords{knowledge space, read/write}
%\VignettePackage{kstIO}

\makeindex{}

\sloppy{}

\begin{document}
\maketitle

\begin{abstract}
\noindent This document explains basic read and write operations for knowledge 
structures and knowledge spaces available in \R{} through the \pkg{kstIO} 
package.
\end{abstract}

\tableofcontents

<<echo=FALSE>>=
options(width = 80)
library("kstIO")
library("kst")
wd <- getwd()
setwd(tempdir())
@ 

\section{Introduction}
\label{sec:introduction}
Knowledge Space Theory \citep{doignon:b99} is a set- and order-theoretical
framework, which proposes mathematical formalisms to operationalize
knowledge structures in a particular domain. There exist several \R{}
packages for knowledge space theory, namely \pkg{kst}, \pkg{kstMatrix}, \pkg{pks}, and
\pkg{DAKS} which use different forms of representations for knowledge
spaces and structures. The \pkg{kstIO} package provides functions for
reading and writing those structures from/to files.

\section{File Formats}\label{sec:fileformats}
Over time and in different research groups with knowledge space theory,
different file formats have evolved.

  \subsection{Matrix Format}
    The probably simplest and most direct approach
    is to store the information in a binary ASCII matrix where a "1" 
    in row i and column j means that item j is element of state/response 
    pattern i. 
    
    There is no separating character between the columns, 
    and there should be no trailing whitespace at the end of the line.
    The last line of the matrix must carry an EndOfLine - in most editors
    (except vi) this means an empty line after the matrix.
  
  \subsection{KST Tools Format}
    This format \citep{hockemeyer:tr01a} extends the 
    matrix format by two preceding header lines containing the number of 
    items and the number of states/response patterns, respectively.
  
  \subsection{SRBT Tools Format}
    This format \citep{poetzi:tr01b} extends 
    the KST tools format by yet another preceding header line with format and 
    content metadata. This new header line has the format
\begin{verbatim}
#SRBT v2.0 <struct> ASCII <comment>
\end{verbatim}
    where \verb|<struct>| specifies the type of data stored in the
    file and \verb|<comment>| is an optional arbitrary comment.

    The following data types are supported by the respective
    \code{kstIO} functions:
    \begin{itemize}
      \item basis
      \item data
      \item space
      \item structure
      \item relation
    \end{itemize}
  \subsection{CSV Format}
    In CSV format, there is a separating comma between the columns. The
    matrix is preceded by a head line containing the item IDs.
    
\section{Specific File Types}  
  \subsection{Base Files}
    For base files, some special rules apply. They are available only
    in KST, SRBT tools, and CSV format. Their matrix part differs from the other files 
    in that it contains "0", "1", and "2". A "1" means that the state
    is minimal for the item and a "2" means that it is not (but contains 
    the item). A "0" stands (as always) for the state not containing 
    the item.

    For \code{kbase} files, the encoding information \code{"ASCII"} is 
    missing because \code{kbase} files are always in ASCII format.

    \textbf{Note:} While the respective functions in \pkg{kst} and 
    \pkg{kstIO} use the term \emph{base} in their names, the \verb|struct>|
    term in the SRBT header line is \emph{basis}.
    
  \subsection{Surmise Function Files}
    In surmise unction files, the matrix is preceded by an additional column
    containing the ID of the item for which the respective matrix row is a clause.
  
  \subsection{(Surmise) Relation Files}
    Also for relation files, some special rules apply. As relation file
    formats were never defined in KST tools format, these files are
    available in SRBT tools, matrix, and CSV format only. Like for \code{base} files,
    the encoding information \code{"ASCII"} is missing because \code{relation}
    files are always in ASCII format.

  \subsection{Example}
  Below, you see an example of a small knowledge structure file in
  SRBT format.
\begin{verbatim}
#SRBT v2.0 structure ASCII
3
5
000
100
110
101
111
\end{verbatim}

  \subsection{Binary File Formats}
  The KST and SRBT Tools User Manuals \citep{hockemeyer:tr01a,poetzi:tr01b}
  define also binary file formats. These formats are not supported
  by the \pkg{kstIO} package.

\section{Output functions}
There are five output functions in the \pkg{kstIO} package.
\begin{itemize}
\item \codefunind{write\_kbase}
\item \codefunind{write\_kdata}
\item \codefunind{write\_kspace}
\item \codefunind{write\_kstructure}
\item \codefunind{write\_surmiserelation}
\end{itemize}
These functions have the same calling scheme
\begin{verbatim}
write_XXX(x, filename, format="SRBT")
\end{verbatim}
where \code{x} denotes the data structure to be written, \code{filename}
the name of the file to be created, and \code{format}
the file format (\code{"SRBT"}, \code{"KST"}, or \code{"matrix"} as
described in Section~\ref{sec:fileformats} above.
The knowledge structure
or knowledge space can be in set-based format (classes \code{kspace} or 
\code{kstructure}) or in matrix format. Please note that for bases,
only the \code{SRBT} and \code{KST} formats are valid.
<<write-functions>>=
# Obtain data from the pks package
data(DoignonFalmagne7)
ksp <- kspace(kstructure(as.pattern(DoignonFalmagne7$K, as.set=TRUE)))
b <- kbase(ksp)
d <- as.binmat(DoignonFalmagne7$N.R, uniq=FALSE)
r <- as.relation(ksp)
ksp
b
head(d)
# Write data to files
write_kstructure(ksp, "DF7.struct")
write_kspace(ksp, "DF7.space", format="matrix")
write_kbase(b, "DF7.bas", format="KST")
write_kdata(d, "DF7.data", format="SRBT")
write_surmiserelation(r, "DF7.rel")
@
The resulting base file, for example, looks like the following:
<<show_example-file>>=
txt <- readLines("DF7.bas")
for (i in txt)
  cat(paste(i, "\n", sep=""))
@

\section{Input Functions}
There are six input functions in the \pkg{kstIO} package.
\begin{itemize}
\item \codefunind{read\_kbase}
\item \codefunind{read\_kdata}
\item \codefunind{read\_kfamset}
\item \codefunind{read\_kspace}
\item \codefunind{read\_kstructure}
\item \codefunind{read\_surmiserelation}
\end{itemize}
These functions have a similar calling scheme. For bases, data, famsets, and knowledge structures
it is
\begin{verbatim}
d <- read_kXXX(filename, format="SRBT")
\end{verbatim}
where \code{filename} denotes the file to be read and \code{format} 
the file format (\code{"SRBT"}, \code{"KST"}, or \code{"matrix"} as
described in Section~\ref{sec:fileformats} above, or \code{"auto"}
(default) for automatic format detection. Please note that automatic
format detection works slightly heuristically and therefore might
err between \code{"KST"} and \code{"matrix"} formats under rare
circumstances.

For surmise relations and knowledge spaces, there is an additional (optional) parameter:
\begin{verbatim}
d <- read_YYYY(filename, format="SRBT", close=FALSE)
\end{verbatim}

The return values depend on the type of file to be read: for 
\codefun{read\_kfamset}, \codefun{read\_kspace}, \codefun{read\_kstructure}, and
\codefun{read\_kbase}, it is a list 
containing two elements, \code{matrix} and \code{sets} which contain 
the read knowledge structure/space/base as a binary matrix and in set-based 
form (i.e. as object of class \code{kspace}, \code{kstructure}, or
\code{kbase}), respectively. For \codefun{read\_kdata}, a binary matrix 
is returned. For \codefun{read\_surmiserelation}, a list with two elements, 
\code{relation} and \code{matrix} is returned which contain the surmise relation
and its incedence matrix, repectively.

If \code{close} is \code{TRUE}, the respective structure is closed, i.\,e.\ in
case of a knowledge space, it is closed under union, and in case of a surmise
relation, it is closed under reflexivity and transitivity.

<<read-functions>>=
# Read the data files stored before
read_kfamset("DF7.space")
read_kstructure("DF7.struct", format="SRBT")
read_kspace("DF7.space", format="matrix")
read_kbase("DF7.bas", format="auto")
head(read_kdata("DF7.data"))
read_surmiserelation("DF7.rel")
@
<<echo=FALSE>>=
setwd(wd)
@

{\small
  \bibliographystyle{abbrvnat}
  \bibliography{kst}
}

\printindex{}

\end{document}