% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/comparators.R
\name{cmp_identical}
\alias{cmp_identical}
\alias{cmp_jarowinkler}
\alias{jaro_winkler}
\alias{cmp_lcs}
\alias{lcs}
\alias{cmp_jaccard}
\alias{jaccard}
\title{Comparison functions}
\usage{
cmp_identical()

cmp_jarowinkler(threshold = 0.95)

jaro_winkler(threshold = 0.8)

cmp_lcs(threshold = 0.8)

lcs(threshold = 0.8)

cmp_jaccard(threshold = 0.8)

jaccard(threshold = 0.8)
}
\arguments{
\item{threshold}{threshold to use for the Jaro-Winkler string distance when
creating a binary result.}
}
\value{
The functions return a comparison function (see details).
}
\description{
Comparison functions
}
\details{
A comparison function should accept two arguments: both vectors. When the 
function is called with both arguments it should compare the elements in the 
first vector to those in the second. When called in this way, both vectors
have the same length. What the function should return depends on the methods
used to score the pairs. Usually the comparison functions return a similarity
score with a value of 0 indication complete difference and a value > 0 
indicating similarity (often a value of 1 will indicate perfect similarity). 

Some methods, such as \code{\link{problink_em}}, can handle similarity
scores, but also need binary values (\code{0}/\code{FALSE} = complete 
dissimilarity; \code{1}/\code{TRUE} = complete similarity). In order to
allow for this the comparison function is called with one argument.

When the comparison is called with one argument, it is passed the result of
a previous comparison. The function should translate that result to a binary 
(\code{TRUE}/\code{FALSE} or \code{1}/\code{0}) result. The result should 
not contain missing values. 

The \code{jaro_winkler}, \code{lcs} and \code{jaccard} functions use the corresponding 
methods from \code{\link{stringdist}} except that they are transformed from
a distance to a similarity score.
}
\section{Warning}{

The functions \code{identical}, \code{jaro_winkler}, \code{lcs} and
\code{jaccard} are deprecated and will be removed in future versions of the
package. Instead use the functions \code{cmp_identical},
\code{cmp_jarowinkler}, \code{cmp_lcs} and \code{cmp_jaccard}.
}

\examples{
cmp <- cmp_identical()
x <- cmp(c("john", "mary", "susan", "jack"), 
         c("johan", "mary", "susanna", NA))
# Applying the comparison function to the result of the comparison results 
# in a logical result, with NA's and values of FALSE set to FALSE
cmp(x)

cmp <- cmp_jarowinkler(0.95)
x <- cmp(c("john", "mary", "susan", "jack"), 
         c("johan", "mary", "susanna", NA))
# Applying the comparison function to the result of the comparison results 
# in a logical result, with NA's and values below the threshold FALSE
cmp(x)

\dontshow{gc()}

}
