#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# graph_tool -- a general graph manipulation python module
#
# Copyright (C) 2006-2016 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import division, absolute_import, print_function
import sys
if sys.version_info < (3,):
    range = xrange

from .. import _degree, _prop, Graph, GraphView, libcore, _get_rng, PropertyMap, \
    conv_pickle_state
from .. stats import label_self_loops
from .. spectral import adjacency
from .. generation import condensation_graph
import random
from numpy import *
import numpy
from scipy.optimize import fsolve, fminbound
import scipy.special
from collections import defaultdict
import copy
import heapq

from .. dl_import import dl_import
dl_import("from . import libgraph_tool_community as libcommunity")

__test__ = False

def set_test(test):
    global __test__
    __test__ = test

def _bm_test():
    global __test__
    return __test__

def get_block_graph(g, B, b, vcount, ecount):
    cg, br, vcount, ecount = condensation_graph(g, b,
                                                vweight=vcount,
                                                eweight=ecount,
                                                self_loops=True)[:4]
    cg.vp["count"] = vcount
    cg.ep["count"] = ecount
    cg = Graph(cg, vorder=br)

    cg.add_vertex(B - cg.num_vertices())
    return cg

class BlockState(object):
    r"""This class encapsulates the block state of a given graph.

    This must be instantiated and used by functions such as :func:`mcmc_sweep`.

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph to be modelled.
    eweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge multiplicities (for multigraphs or block graphs).
    vweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex multiplicities (for block graphs).
    b : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Initial block labels on the vertices. If not supplied, it will be
        randomly sampled.
    B : ``int`` (optional, default: ``None``)
        Number of blocks. If not supplied it will be either obtained from the
        parameter ``b``, or set to the maximum possible value according to the
        minimum description length.
    clabel : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Constraint labels on the vertices. If supplied, vertices with different
        label values will not be clustered in the same group.
    deg_corr : ``bool`` (optional, default: ``True``)
        If ``True``, the degree-corrected version of the blockmodel ensemble will
        be assumed, otherwise the traditional variant will be used.
    max_BE : ``int`` (optional, default: ``1000``)
        If the number of blocks exceeds this number, a sparse representation of
        the block graph is used, which is slightly less efficient, but uses less
        memory,
    """

    def __init__(self, g, eweight=None, vweight=None, b=None,
                 B=None, clabel=None, deg_corr=True,
                 max_BE=1000, **kwargs):

        # initialize weights to unity, if necessary
        if eweight is None:
            eweight = g.new_edge_property("int")
            eweight.fa = 1
        elif eweight.value_type() != "int32_t":
            eweight = eweight.copy(value_type="int32_t")
        if vweight is None:
            vweight = g.new_vertex_property("int")
            vweight.fa = 1
        elif vweight.value_type() != "int32_t":
            vweight = vweight.copy(value_type="int32_t")
        self.eweight = g.own_property(eweight)
        self.vweight = g.own_property(vweight)

        self.is_weighted = False
        if ((g.num_edges() > 0 and self.eweight.fa.max() > 1) or
            kwargs.get("force_weighted", False)):
            self.is_weighted = True

        # configure the main graph and block model parameters
        self.g = g

        self.E = int(self.eweight.fa.sum())
        self.N = int(self.vweight.fa.sum())

        self.deg_corr = deg_corr

        # ensure we have at most as many blocks as nodes
        if B is not None and b is None:
            B = min(B, self.g.num_vertices())

        if b is None:
            # create a random partition into B blocks.
            if B is None:
                B = get_max_B(self.N, self.E, directed=g.is_directed())
            B = min(B, self.g.num_vertices())
            ba = random.randint(0, B, self.g.num_vertices())
            ba[:B] = arange(B)        # avoid empty blocks
            if B < self.g.num_vertices():
                random.shuffle(ba)
            b = g.new_vertex_property("int")
            b.fa = ba
            self.b = b
        else:
            # if a partition is available, we will incorporate it.
            if isinstance(b, numpy.ndarray):
                self.b = g.new_vertex_property("int")
                self.b.fa = b
            else:
                self.b = b = g.own_property(b.copy(value_type="int"))
            if B is None:
                B = int(self.b.fa.max()) + 1

        # if B > self.N:
        #     raise ValueError("B > N!")

        if self.b.fa.max() >= B:
            raise ValueError("Maximum value of b is larger or equal to B! (%d vs %d)" % (self.b.fa.max(), B))

        # Construct block-graph
        self.bg = get_block_graph(g, B, self.b, self.vweight, self.eweight)
        self.bg.set_fast_edge_removal()

        self.mrs = self.bg.ep["count"]
        self.wr = self.bg.vp["count"]

        del self.bg.ep["count"]
        del self.bg.vp["count"]

        self.mrp = self.bg.degree_property_map("out", weight=self.mrs)

        if g.is_directed():
            self.mrm = self.bg.degree_property_map("in", weight=self.mrs)
        else:
            self.mrm = self.mrp

        self.vertices = libcommunity.get_vector(B)
        self.vertices.a = arange(B)
        self.B = B

        if clabel is not None:
            if isinstance(clabel, PropertyMap):
                self.clabel = self.g.own_property(clabel.copy("int"))
            else:
                self.clabel = self.g.new_vertex_property("int")
                self.clabel.fa = clabel
        else:
            self.clabel = self.g.new_vertex_property("int")

        if max_BE is None:
            max_BE = 1000
        self.max_BE = max_BE

        self.overlap = False
        self.ignore_degrees = kwargs.get("ignore_degrees", None)
        if self.ignore_degrees is None:
            self.ignore_degrees = g.new_vertex_property("bool", False)

        self.clear_cache()

    def clear_cache(self):
        # used by mcmc_sweep()
        self.egroups = None
        self.nsampler = None
        self.sweep_vertices = None
        self.block_list = None
        self.overlap_stats = libcommunity.overlap_stats()
        self.partition_stats = libcommunity.partition_stats()
        self.edges_dl = False
        self.emat = None

    def __repr__(self):
        return "<BlockState object with %d blocks,%s for graph %s, at 0x%x>" % \
            (self.B, " degree corrected," if self.deg_corr else "", str(self.g),
             id(self))


    def __init_partition_stats(self, empty=True, edges_dl=False):
        self.edges_dl = edges_dl
        if not empty:
            self.partition_stats = libcommunity.init_partition_stats(self.g._Graph__graph,
                                                                     _prop("v", self.g, self.b),
                                                                     _prop("e", self.g, self.eweight),
                                                                     self.N, self.B,
                                                                     edges_dl,
                                                                     _prop("v", self.g, self.ignore_degrees))
        else:
            self.partition_stats = libcommunity.partition_stats()


    def __copy__(self):
        return self.copy()

    def __deepcopy__(self, memo):
        g = self.g.copy()
        eweight = g.own_property(self.eweight.copy())
        vweight = g.own_property(self.vweight.copy())
        clabel = g.own_property(self.clabel.copy())
        b = g.own_property(self.b.copy())
        return self.copy(g=g, eweight=eweight, vweight=vweight, b=b,
                         clabel=clabel)

    def copy(self, g=None, eweight=None, vweight=None, b=None, B=None,
             deg_corr=None, clabel=None, overlap=False, **kwargs):
        r"""Copies the block state. The parameters override the state properties, and
         have the same meaning as in the constructor. If ``overlap=True`` an
         instance of :class:`~graph_tool.community.OverlapBlockState` is
         returned. This is by default a shallow copy."""

        if not overlap:
            state = BlockState(self.g if g is None else g,
                               eweight=self.eweight if eweight is None else eweight,
                               vweight=self.vweight if vweight is None else vweight,
                               b=self.b.copy() if b is None else b,
                               B=(self.B if b is None else None) if B is None else B,
                               clabel=self.clabel if clabel is None else clabel,
                               deg_corr=self.deg_corr if deg_corr is None else deg_corr,
                               max_BE=self.max_BE,
                               ignore_degrees=kwargs.pop("ignore_degrees", self.ignore_degrees),
                               **kwargs)
        else:
            state = OverlapBlockState(self.g if g is None else g,
                                      b=b if b is not None else self.b,
                                      B=(self.B if b is None else None) if B is None else B,
                                      clabel=self.clabel if clabel is None else clabel,
                                      deg_corr=self.deg_corr if deg_corr is None else deg_corr,
                                      max_BE=self.max_BE, **kwargs)

        if not state.__check_clabel() and kwargs.get("fix_clabel", True):
            b = state.b.fa + state.clabel.fa * state.B
            continuous_map(b)
            state = state.copy(b=b, fix_clabel=False)
            if not state.__check_clabel():
                raise RuntimeError("Inconsistent clabel after copy!")

        return state


    def __getstate__(self):
        state = dict(g=self.g,
                     eweight=self.eweight,
                     vweight=self.vweight,
                     b=self.b,
                     B=self.B,
                     clabel=self.clabel,
                     deg_corr=self.deg_corr,
                     max_BE=self.max_BE,
                     ignore_degrees=self.ignore_degrees)
        return state

    def __setstate__(self, state):
        conv_pickle_state(state)
        self.__init__(**state)
        return state

    def get_block_state(self, b=None, vweight=False, deg_corr=False,
                        overlap=False, **kwargs):
        r"""Returns a :class:`~graph_tool.community.BlockState`` corresponding to the
        block graph. The parameters have the same meaning as the in the constructor."""

        state = BlockState(self.bg, eweight=self.mrs,
                           vweight=self.wr if vweight else None,
                           b=self.bg.vertex_index.copy("int") if b is None else b,
                           clabel=self.get_bclabel(),
                           deg_corr=deg_corr,
                           max_BE=self.max_BE)
        if overlap:
            state = state.copy(overlap=True)
        n_map = self.b.copy()
        return state, n_map

    def get_bclabel(self):
        r"""Returns a :class:`~graph_tool.PropertyMap`` corresponding to constraint
        labels for the block graph."""

        bclabel = self.bg.new_vertex_property("int")
        reverse_map(self.b, bclabel)
        pmap(bclabel, self.clabel)
        return bclabel

    def __check_clabel(self):
        b = self.b.fa + self.clabel.fa * self.B
        b2 = self.b.fa.copy()
        continuous_map(b)
        continuous_map(b2)
        return (b == b2).all()

    def __get_emat(self):
        if self.emat is None:
            self.__regen_emat()
        return self.emat

    def __regen_emat(self):
        if self.B <= self.max_BE:
            self.emat = libcommunity.create_emat(self.g._Graph__graph,
                                                 _prop("v", self.g, self.b),
                                                 self.bg._Graph__graph)
        else:
            self.emat = libcommunity.create_ehash(self.g._Graph__graph,
                                                  _prop("v", self.g, self.b),
                                                  self.bg._Graph__graph,
                                                  _get_rng())

    def __build_egroups(self, empty=False):
        self.esrcpos = self.g.new_edge_property("int")
        self.etgtpos = self.g.new_edge_property("int")

        self.egroups = libcommunity.build_egroups(self.g._Graph__graph,
                                                  self.bg._Graph__graph,
                                                  _prop("v", self.g, self.b),
                                                  _prop("e", self.g, self.eweight),
                                                  _prop("e", self.g, self.esrcpos),
                                                  _prop("e", self.g, self.etgtpos),
                                                  self.is_weighted, empty)

    def __build_nsampler(self, empty=False):
        self.nsampler = libcommunity.init_neighbour_sampler(self.g._Graph__graph,
                                                            _prop("e", self.g, self.eweight),
                                                            True, empty)

    def get_blocks(self):
        r"""Returns the property map which contains the block labels for each vertex."""
        return self.b

    def get_bg(self):
        r"""Returns the block graph."""
        return self.bg

    def get_ers(self):
        r"""Returns the edge property map of the block graph which contains the :math:`e_{rs}` matrix entries.
        For undirected graphs, the diagonal values (self-loops) contain :math:`e_{rr}/2`."""
        return self.mrs

    def get_er(self):
        r"""Returns the vertex property map of the block graph which contains the number
        :math:`e_r` of half-edges incident on block :math:`r`. If the graph is
        directed, a pair of property maps is returned, with the number of
        out-edges :math:`e^+_r` and in-edges :math:`e^-_r`, respectively."""
        if self.bg.is_directed():
            return self.mrp, self.mrm
        else:
            return self.mrp

    def get_nr(self):
        r"""Returns the vertex property map of the block graph which contains the block sizes :math:`n_r`."""
        return self.wr

    def entropy(self, complete=True, dl=False, partition_dl=True,
                degree_dl=True, edges_dl=True, dense=False, multigraph=True,
                norm=False, dl_ent=False, **kwargs):
        r"""Calculate the entropy associated with the current block partition.

        Parameters
        ----------
        complete : ``bool`` (optional, default: ``False``)
            If ``True``, the complete entropy will be returned, including constant
            terms not relevant to the block partition.
        dl : ``bool`` (optional, default: ``False``)
            If ``True``, the full description length will be returned.
        partition_dl : ``bool`` (optional, default: ``True``)
            If ``True``, and ``dl == True`` the partition description length
            will be considered.
        edges_dl : ``bool`` (optional, default: ``True``)
            If ``True``, and ``dl == True`` the edge matrix description length
            will be considered.
        degree_dl : ``bool`` (optional, default: ``True``)
            If ``True``, and ``dl == True`` the degree sequence description
            length will be considered.
        dense : ``bool`` (optional, default: ``False``)
            If ``True``, the "dense" variant of the entropy will be computed.
        multigraph : ``bool`` (optional, default: ``False``)
            If ``True``, the multigraph entropy will be used.
        norm : ``bool`` (optional, default: ``True``)
            If ``True``, the entropy will be "normalized" by dividing by the
            number of edges.
        dl_ent : ``bool`` (optional, default: ``False``)
            If ``True``, the description length of the degree sequence will be
            approximated by its entropy.

        Notes
        -----
        For the traditional blockmodel (``deg_corr == False``), the entropy is
        given by

        .. math::

          \mathcal{S}_t &\cong E - \frac{1}{2} \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{n_rn_s}\right), \\
          \mathcal{S}^d_t &\cong E - \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{n_rn_s}\right),

        for undirected and directed graphs, respectively, where :math:`e_{rs}`
        is the number of edges from block :math:`r` to :math:`s` (or the number
        of half-edges for the undirected case when :math:`r=s`), and :math:`n_r`
        is the number of vertices in block :math:`r` .

        For the degree-corrected variant with "hard" degree constraints the
        equivalent expressions are

        .. math::

            \mathcal{S}_c &\cong -E -\sum_kN_k\ln k! - \frac{1}{2} \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{e_re_s}\right), \\
            \mathcal{S}^d_c &\cong -E -\sum_{k^+}N_{k^+}\ln k^+!  -\sum_{k^-}N_{k^-}\ln k^-! - \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{e^+_re^-_s}\right),

        where :math:`e_r = \sum_se_{rs}` is the number of half-edges incident on
        block :math:`r`, and :math:`e^+_r = \sum_se_{rs}` and :math:`e^-_r =
        \sum_se_{sr}` are the numbers of out- and in-edges adjacent to block
        :math:`r`, respectively.

        If ``dense == False`` and ``multigraph == True``, the entropy used will
        be of the "Poisson" model, with the additional term:

        .. math::

            {\mathcal{S}_{cm}^{(d)}} = \mathcal{S}_c^{(d)} + \sum_{i>j} \ln A_{ij}! + \sum_i \ln A_{ii}!!


        If ``dl == True``, the description length :math:`\mathcal{L}_t` of the
        model will be returned as well, as described in
        :func:`model_entropy`. Note that for the degree-corrected version the
        description length is

        .. math::

            \mathcal{L}_c = \mathcal{L}_t + \sum_r\min\left(\mathcal{L}^{(1)}_r, \mathcal{L}^{(2)}_r\right),

        with

        .. math::

              \mathcal{L}^{(1)}_r &= \ln{\left(\!\!{n_r \choose e_r}\!\!\right)}, \\
              \mathcal{L}^{(2)}_r &= \ln\Xi_r + \ln n_r! - \sum_k \ln n^r_k!,

        and :math:`\ln\Xi_r \simeq 2\sqrt{\zeta(2)e_r}`, where :math:`\zeta(x)`
        is the `Riemann zeta function
        <https://en.wikipedia.org/wiki/Riemann_zeta_function>`_, and
        :math:`n^r_k` is the number of nodes in block :math:`r` with degree
        :math:`k`. For directed graphs we have instead :math:`k \to (k^-, k^+)`,
        and :math:`\ln\Xi_r \to \ln\Xi^+_r + \ln\Xi^-_r` with :math:`\ln\Xi_r^+
        \simeq 2\sqrt{\zeta(2)e^+_r}` and :math:`\ln\Xi_r^- \simeq
        2\sqrt{\zeta(2)e^-_r}`.

        If ``dl_ent=True`` is passed, this will be approximated instead by

        .. math::

            \mathcal{L}_c \simeq \mathcal{L}_t - \sum_rn_r\sum_kp^r_k\ln p^r_k,

        where :math:`p^r_k = n^r_k / n_r`.

        If the "dense" entropies are requested (``dense=True``), they will be
        computed as

        .. math::

            \mathcal{S}_t  &= \sum_{r>s} \ln{\textstyle {n_rn_s \choose e_{rs}}} + \sum_r \ln{\textstyle {{n_r\choose 2}\choose e_{rr}/2}}\\
            \mathcal{S}^d_t  &= \sum_{rs} \ln{\textstyle {n_rn_s \choose e_{rs}}},

        for simple graphs, and

        .. math::

            \mathcal{S}_m  &= \sum_{r>s} \ln{\textstyle \left(\!\!{n_rn_s \choose e_{rs}}\!\!\right)} + \sum_r \ln{\textstyle \left(\!\!{\left(\!{n_r\choose 2}\!\right)\choose e_{rr}/2}\!\!\right)}\\
            \mathcal{S}^d_m  &= \sum_{rs} \ln{\textstyle \left(\!\!{n_rn_s \choose e_{rs}}\!\!\right)},

        for multigraphs (i.e. ``multigraph == True``). A dense entropy for the
        degree-corrected model is not available, and if requested will return a
        :exc:`NotImplementedError`.

        If ``complete == False`` constants in the above equations which do not
        depend on the partition of the nodes will be omitted.

        Note that in all cases if ``norm==True`` the value returned corresponds
        to the entropy `per edge`, i.e. :math:`(\mathcal{S}_{t/c}\; [\,+\,\mathcal{L}_{t/c}])/ E`.
        """

        xi_fast = kwargs.get("xi_fast", False)
        dl_deg_alt = kwargs.get("dl_deg_alt", True)

        E = self.E
        N = self.N

        if dense:
            if self.deg_corr:
                raise NotImplementedError('A degree-corrected "dense" entropy is not yet implemented')

            S = libcommunity.entropy_dense(self.bg._Graph__graph,
                                            _prop("e", self.bg, self.mrs),
                                            _prop("v", self.bg, self.wr),
                                            multigraph)
        else:
            S = libcommunity.entropy(self.bg._Graph__graph,
                                      _prop("e", self.bg, self.mrs),
                                      _prop("v", self.bg, self.mrp),
                                      _prop("v", self.bg, self.mrm),
                                      _prop("v", self.bg, self.wr),
                                      self.deg_corr)

            if _bm_test():
                assert not isnan(S) and not isinf(S), "invalid entropy %g (%s) " % (S, str(dict(complete=complete,
                                                                                                random=random, dl=dl,
                                                                                                partition_dl=partition_dl,
                                                                                                edges_dl=edges_dl,
                                                                                                dense=dense, multigraph=multigraph,
                                                                                                norm=norm)))
            if self.deg_corr:
                S -= E
            else:
                S += E

            if complete:
                if self.deg_corr:
                    S += libcommunity.deg_entropy_term(self.g._Graph__graph,
                                                       libcore.any(),
                                                       self.overlap_stats,
                                                       self.N,
                                                       _prop("e", self.g, self.eweight),
                                                       _prop("v", self.g, self.ignore_degrees))

                if multigraph:
                    S += libcommunity.entropy_parallel(self.g._Graph__graph,
                                                       _prop("e", self.g, self.eweight))

                if _bm_test():
                    assert not isnan(S) and not isinf(S), "invalid entropy %g (%s) " % (S, str(dict(complete=complete,
                                                                                                    random=random, dl=dl,
                                                                                                    partition_dl=partition_dl,
                                                                                                    edges_dl=edges_dl,
                                                                                                    dense=dense, multigraph=multigraph,
                                                                                                    norm=norm)))
        if dl:
            if partition_dl:
                if self.partition_stats.is_enabled():
                    S += self.partition_stats.get_partition_dl()
                else:
                    self.__init_partition_stats(empty=False)
                    S += self.partition_stats.get_partition_dl()
                    self.__init_partition_stats(empty=True)

                if _bm_test():
                    assert not isnan(S) and not isinf(S), "invalid entropy %g (%s) " % (S, str(dict(complete=complete,
                                                                                                    random=random, dl=dl,
                                                                                                    partition_dl=partition_dl,
                                                                                                    edges_dl=edges_dl,
                                                                                                    dense=dense, multigraph=multigraph,
                                                                                                    norm=norm)))
            if edges_dl:
                actual_B = (self.wr.a > 0).sum()
                S += model_entropy(actual_B, N, E, directed=self.g.is_directed(), nr=False)

            if self.deg_corr and degree_dl:
                if self.partition_stats.is_enabled():
                    S_seq = self.partition_stats.get_deg_dl(dl_ent, dl_deg_alt, xi_fast)
                else:
                    self.__init_partition_stats(empty=False)
                    S_seq = self.partition_stats.get_deg_dl(dl_ent, dl_deg_alt, xi_fast)
                    self.__init_partition_stats(empty=True)

                S += S_seq

                if _bm_test():
                    assert not isnan(S_seq) and not isinf(S_seq), "invalid entropy %g (%s) " % (S_seq, str(dict(complete=complete,
                                                                                                                random=random, dl=dl,
                                                                                                                partition_dl=partition_dl,
                                                                                                                edges_dl=edges_dl,
                                                                                                                dense=dense, multigraph=multigraph,
                                                                                                                norm=norm)))

        if _bm_test():
            assert not isnan(S) and not isinf(S), "invalid entropy %g (%s) " % (S, str(dict(complete=complete,
                                                                                            random=random, dl=dl,
                                                                                            partition_dl=partition_dl,
                                                                                            edges_dl=edges_dl,
                                                                                            dense=dense, multigraph=multigraph,
                                                                                            norm=norm)))

        if norm:
            return S / E
        else:
            return S

    def get_matrix(self):
        r"""Returns the block matrix (as a sparse :class:`~scipy.sparse.csr_matrix`),
        which contains the number of edges between each block pair.

        .. warning::

           This corresponds to the adjacency matrix of the block graph, which by
           convention includes twice the amount of edges in the diagonal entries
           if the graph is undirected.

        Examples
        --------

        .. testsetup:: get_matrix

           gt.seed_rng(42)
           np.random.seed(42)
           from pylab import *

        .. doctest:: get_matrix

           >>> g = gt.collection.data["polbooks"]
           >>> state = gt.BlockState(g, B=5, deg_corr=True)
           >>> for i in range(1000):
           ...     ds, nmoves = gt.mcmc_sweep(state)
           >>> m = state.get_matrix()
           >>> figure()
           <...>
           >>> matshow(m.todense())
           <...>
           >>> savefig("bloc_mat.pdf")

        .. testcleanup:: get_matrix

           savefig("bloc_mat.png")

        .. figure:: bloc_mat.*
           :align: center

           A  5x5 block matrix.

       """

        return adjacency(self.bg, weight=self.mrs)


def model_entropy(B, N, E, directed=False, nr=None):
    r"""Computes the amount of information necessary for the parameters of the traditional blockmodel ensemble, for ``B`` blocks, ``N`` vertices, ``E`` edges, and either a directed or undirected graph.

    A traditional blockmodel is defined as a set of :math:`N` vertices which can
    belong to one of :math:`B` blocks, and the matrix :math:`e_{rs}` describes
    the number of edges from block :math:`r` to :math:`s` (or twice that number
    if :math:`r=s` and the graph is undirected).

    For an undirected graph, the number of distinct :math:`e_{rs}` matrices is given by,

    .. math::

       \Omega_m = \left(\!\!{\left(\!{B \choose 2}\!\right) \choose E}\!\!\right)

    and for a directed graph,

    .. math::
       \Omega_m = \left(\!\!{B^2 \choose E}\!\!\right)


    where :math:`\left(\!{n \choose k}\!\right) = {n+k-1\choose k}` is the
    number of :math:`k` combinations with repetitions from a set of size :math:`n`.

    The total information necessary to describe the model is then,

    .. math::

       \mathcal{L}_t = \ln\Omega_m + \ln\left(\!\!{B \choose N}\!\!\right) + \ln N! - \sum_r \ln n_r!,


    where the remaining term is the information necessary to describe the
    block partition, where :math:`n_r` is the number of nodes in block :math:`r`.

    If ``nr`` is ``None``, it is assumed :math:`n_r=N/B`.

    References
    ----------

    .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module inference in large networks",
       Phys. Rev. Lett. 110, 148701 (2013), :doi:`10.1103/PhysRevLett.110.148701`, :arxiv:`1212.4794`.
    .. [peixoto-hierarchical-2014] Tiago P. Peixoto, "Hierarchical block structures and high-resolution
       model selection in large networks ", Phys. Rev. X 4, 011047 (2014), :doi:`10.1103/PhysRevX.4.011047`,
       :arxiv:`1310.4377`.

    """

    if directed:
        x = (B * B);
    else:
        x = (B * (B + 1)) / 2;
    if nr is False:
        L = lbinom(x + E - 1, E)
    else:
        L = lbinom(x + E - 1, E) + partition_entropy(B, N, nr)
    return L

def lbinom(n, k):
    return scipy.special.gammaln(float(n + 1)) - scipy.special.gammaln(float(n - k + 1)) - scipy.special.gammaln(float(k + 1))

def lbinom_careful(n, k):
    return libcommunity.lbinom_careful(n, k)

def lbinom_fast(n, k):
    return libcommunity.lbinom_fast(n, k)

def partition_entropy(B, N, nr=None):
    if nr is None:
        S = N * log(B) + log1p(-(1 - 1./B) ** N)
    else:
        S = lbinom(B + N - 1, N) + scipy.special.gammaln(N + 1) - scipy.special.gammaln(nr + 1).sum()
    return S

def get_max_B(N, E, directed=False):
    r"""Return the maximum detectable number of blocks, obtained by minimizing:

    .. math::

        \mathcal{L}_t(B, N, E) - E\ln B

    where :math:`\mathcal{L}_t(B, N, E)` is the information necessary to
    describe a traditional blockmodel with `B` blocks, `N` nodes and `E`
    edges (see :func:`model_entropy`).

    Examples
    --------

    >>> gt.get_max_B(N=1e6, E=5e6)
    1572

    References
    ----------
    .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module inference in large networks",
       Phys. Rev. Lett. 110, 148701 (2013), :doi:`10.1103/PhysRevLett.110.148701`, :arxiv:`1212.4794`.


    """

    def Sdl(B, S, N, E, directed=False):
        return S + model_entropy(B, N, E, directed) / E

    B = fminbound(lambda B: Sdl(B, -log(B), N, E, directed), 1, E,
                  xtol=1e-6, maxfun=1500, disp=0)
    if isnan(B):
        B = 1
    return min(N, max(int(ceil(B)), 2))

def get_akc(B, I, N=numpy.inf, directed=False):
    r"""Return the minimum value of the average degree of the network, so that some block structure with :math:`B` blocks can be detected, according to the minimum description length criterion.

    This is obtained by solving

    .. math::

       \Sigma_b = \mathcal{L}_t(B, N, E) - E\mathcal{I}_{t/c} = 0,

    where :math:`\mathcal{L}_{t}` is the necessary information to describe the
    blockmodel parameters (see :func:`model_entropy`), and
    :math:`\mathcal{I}_{t/c}` characterizes the planted block structure, and is
    given by

    .. math::

        \mathcal{I}_t &= \sum_{rs}m_{rs}\ln\left(\frac{m_{rs}}{w_rw_s}\right),\\
        \mathcal{I}_c &= \sum_{rs}m_{rs}\ln\left(\frac{m_{rs}}{m_rm_s}\right),

    where :math:`m_{rs} = e_{rs}/2E` (or :math:`m_{rs} = e_{rs}/E` for directed
    graphs) and :math:`w_r=n_r/N`. We note that :math:`\mathcal{I}_{t/c}\in[0,
    \ln B]`. In the case where :math:`E \gg B^2`, this simplifies to

    .. math::

       \left<k\right>_c &= \frac{2\ln B}{\mathcal{I}_{t/c}},\\
       \left<k^{-/+}\right>_c &= \frac{\ln B}{\mathcal{I}_{t/c}},

    for undirected and directed graphs, respectively. This limit is assumed if
    ``N == inf``.

    Examples
    --------

    >>> gt.get_akc(10, log(10) / 100, N=100)
    2.414413200430159

    References
    ----------
    .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module inference in large networks",
       Phys. Rev. Lett. 110, 148701 (2013), :doi:`10.1103/PhysRevLett.110.148701`, :arxiv:`1212.4794`.

    """
    if N != numpy.inf:
        if directed:
            get_dl = lambda ak: model_entropy(B, N, N * ak, directed) / N * ak - N * ak * I
        else:
            get_dl = lambda ak: model_entropy(B, N, N * ak / 2., directed) * 2 / (N * ak)  - N * ak * I / 2.
        ak = fsolve(lambda ak: get_dl(ak), 10)
        ak = float(ak)
    else:
        ak = 2 * log(B) / S
        if directed:
            ak /= 2
    return ak

def mcmc_sweep(state, beta=1., c=1., niter=1, dl=False, dense=False,
               multigraph=False, node_coherent=False, confine_layers=False,
               sequential=True, parallel=False, vertices=None,
               target_blocks=None, verbose=False, **kwargs):
    r"""Performs a Markov chain Monte Carlo sweep on the network, to sample the block partition according to a probability :math:`\propto e^{-\beta \mathcal{S}_{t/c}}`, where :math:`\mathcal{S}_{t/c}` is the blockmodel entropy.

    Parameters
    ----------
    state : :class:`~graph_tool.community.BlockState`, :class:`~graph_tool.community.OverlapBlockState` or :class:`~graph_tool.community.CovariateBlockState`
        The block state.
    beta : ``float`` (optional, default: `1.0`)
        The inverse temperature parameter :math:`\beta`.
    c : ``float`` (optional, default: ``1.0``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
    niter : ``int`` (optional, default: ``1``)
        Number of sweeps to perform.
    dl : ``bool`` (optional, default: ``False``)
        If ``True``, the change in the whole description length will be
        considered after each vertex move, not only the entropy.
    dense : ``bool`` (optional, default: ``False``)
        If ``True``, the "dense" variant of the entropy will be computed.
    multigraph : ``bool`` (optional, default: ``False``)
        If ``True``, the multigraph entropy will be used. Only has an effect
        if ``dense == True``.
    node_coherent : ``bool`` (optional, default: ``False``)
        If ``True``, and if the ``state`` is an instance of
        :class:`~graph_tool.community.OverlapBlockState`, then all half-edges
        incident on the same node are moved simultaneously.
    confine_layers : ``bool`` (optional, default: ``False``)
        If ``True``, and if the ``state`` is an instance of
        :class:`~graph_tool.community.CovariateBlockState`, with an
        *overlapping* partition, the half edges will only be moved in such a way
         that inside each layer the group membership remains non-overlapping.
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    parallel : ``bool`` (optional, default: ``False``)
        If ``True``, the updates are performed in parallel (multiple
        threads).

        .. warning::

            If this is used, the Markov Chain is not guaranteed to be sampled with
            the correct probabilities. This is better used in conjunction with
            ``beta=float('inf')``, where this is not an issue.

    vertices : ``list of ints`` (optional, default: ``None``)
        A list of vertices which will be attempted to be moved. If ``None``, all
        vertices will be attempted.
    target_blocks : ``list of ints`` (optional, default: ``None``)
        A list of groups to which the corresponding vertices will to be forcibly
        moved. If ``None``, the standard MCMC rules will be applied.
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------

    dS : ``float``
       The entropy difference (in nats) after the sweeps.
    nmoves : ``int``
       The number of accepted block membership moves.


    Notes
    -----

    This algorithm performs a Markov chain Monte Carlo sweep on the network,
    where the block memberships are randomly moved, and either accepted or
    rejected, so that after sufficiently many sweeps the partitions are sampled
    with probability proportional to :math:`e^{-\beta\mathcal{S}_{t/c}}`, where
    :math:`\mathcal{S}_{t/c}` is the blockmodel entropy, given by

    .. math::

      \mathcal{S}_t &\cong - \frac{1}{2} \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{n_rn_s}\right), \\
      \mathcal{S}^d_t &\cong - \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{n_rn_s}\right),

    for undirected and directed traditional blockmodels (``deg_corr == False``),
    respectively, where :math:`e_{rs}` is the number of edges from block
    :math:`r` to :math:`s` (or the number of half-edges for the undirected case
    when :math:`r=s`), and :math:`n_r` is the number of vertices in block
    :math:`r`, and constant terms which are independent of the block partition
    were dropped (see :meth:`BlockState.entropy` for the complete entropy). For
    the degree-corrected variant with "hard" degree constraints the equivalent
    expressions are

    .. math::

       \mathcal{S}_c &\cong  - \frac{1}{2} \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{e_re_s}\right), \\
       \mathcal{S}^d_c &\cong - \sum_{rs}e_{rs}\ln\left(\frac{e_{rs}}{e^+_re^-_s}\right),

    where :math:`e_r = \sum_se_{rs}` is the number of half-edges incident on
    block :math:`r`, and :math:`e^+_r = \sum_se_{rs}` and :math:`e^-_r =
    \sum_se_{sr}` are the number of out- and in-edges adjacent to block
    :math:`r`, respectively.

    The Monte Carlo algorithm employed attempts to improve the mixing time of
    the Markov chain by proposing membership moves :math:`r\to s` with
    probability :math:`p(r\to s|t) \propto e_{ts} + c`, where :math:`t` is the
    block label of a random neighbour of the vertex being moved. See
    [peixoto-efficient-2014]_ for more details.

    This algorithm has a complexity of :math:`O(E)`, where :math:`E` is the
    number of edges in the network.

    Examples
    --------
    .. testsetup:: mcmc

       gt.seed_rng(42)
       np.random.seed(42)

    .. doctest:: mcmc

       >>> g = gt.collection.data["polbooks"]
       >>> state = gt.BlockState(g, B=3, deg_corr=True)
       >>> pv = None
       >>> for i in range(1000):        # remove part of the transient
       ...     ds, nmoves = gt.mcmc_sweep(state)
       >>> for i in range(1000):
       ...     ds, nmoves = gt.mcmc_sweep(state)
       ...     pv = gt.collect_vertex_marginals(state, pv)
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_shape="pie", vertex_pie_fractions=pv, output="polbooks_blocks_soft.pdf")
       <...>

    .. testcleanup:: mcmc

       gt.graph_draw(g, pos=g.vp["pos"], vertex_shape="pie", vertex_pie_fractions=pv, output="polbooks_blocks_soft.png")

    .. figure:: polbooks_blocks_soft.*
       :align: center

       "Soft" block partition of a political books network with :math:`B=3`.

     References
    ----------

    .. [holland-stochastic-1983] Paul W. Holland, Kathryn Blackmond Laskey,
       Samuel Leinhardt, "Stochastic blockmodels: First steps",
       Carnegie-Mellon University, Pittsburgh, PA 15213, U.S.A.,
       :doi:`10.1016/0378-8733(83)90021-7`
    .. [faust-blockmodels-1992] Katherine Faust, and Stanley
       Wasserman. "Blockmodels: Interpretation and Evaluation." Social Networks
       14, no. 1-2 (1992): 5-61. :doi:`10.1016/0378-8733(92)90013-W`
    .. [karrer-stochastic-2011] Brian Karrer, and M. E. J. Newman. "Stochastic
       Blockmodels and Community Structure in Networks." Physical Review E 83,
       no. 1 (2011): 016107. :doi:`10.1103/PhysRevE.83.016107`.
    .. [peixoto-entropy-2012] Tiago P. Peixoto "Entropy of Stochastic Blockmodel
       Ensembles." Physical Review E 85, no. 5 (2012): 056122.
       :doi:`10.1103/PhysRevE.85.056122`, :arxiv:`1112.6028`.
    .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module
       inference in large networks", Phys. Rev. Lett. 110, 148701 (2013),
       :doi:`10.1103/PhysRevLett.110.148701`, :arxiv:`1212.4794`.
    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89,
       012804 (2014), :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
    .. [peixoto-model-2016] Tiago P. Peixoto, "Model selection and hypothesis
       testing for large-scale network models with overlapping groups",
       Phys. Rev. X 5, 011033 (2016), :doi:`10.1103/PhysRevX.5.011033`,
       :arxiv:`1409.3059`.
    """

    nmerges = kwargs.get("nmerges", 0)
    merge_map = kwargs.get("merge_map", None)
    coherent_merge = kwargs.get("coherent_merge", False)
    edges_dl = kwargs.get("edges_dl", False)
    block_list  = kwargs.get("block_list", None)

    if state.B == 1:
        return 0., 0

    if vertices is not None:
        temp = libcommunity.get_vector(len(vertices))
        temp.a = vertices
        vertices = temp
        state.sweep_vertices = vertices
    elif (state.sweep_vertices is None or
          len(state.sweep_vertices.a) < state.g.num_vertices()):
        vertices = libcommunity.get_vector(state.g.num_vertices())
        vertices.a = state.g.vertex_index.copy("int").fa
        state.sweep_vertices = vertices

    if target_blocks is not None:
        temp = libcommunity.get_vector(len(target_blocks))
        temp.a = target_blocks
        target_blocks = temp
        if len(target_blocks) != len(state.sweep_vertices):
            raise ValueError("'target_blocks' must have the same length as 'vertices'")
    else:
        target_blocks = libcommunity.get_vector(0)

    if block_list is not None:
        state.block_list = libcommunity.get_vector(len(block_list))
        state.block_list.a = block_list
    elif state.block_list is None or len(state.block_list) != state.B:
        state.block_list = libcommunity.get_vector(state.B)
        state.block_list.a = arange(state.B)

    random_move = c == numpy.inf

    bclabel = state.get_bclabel()

    if nmerges == 0 or merge_map is None:
        merge_map = state.g.vertex_index.copy("int")

    if nmerges > 0:
        beta = numpy.inf

    nsampler = []
    ncavity_sampler = []

    main_state = state
    if isinstance(state, CovariateBlockState):
        states = state.states
        covariate = True
    else:
        states = [state]
        covariate = False

    for l, state in enumerate(states):

        if l == 0 and (random_move or nmerges > 0):
            state._BlockState__build_egroups(empty=True)
        elif state.egroups is None:
            state._BlockState__build_egroups(empty=False)

        if nmerges == 0:
            if state.nsampler is None:
                state._BlockState__build_nsampler(empty=state.overlap)
            nsampler.append(state.nsampler)
            ncavity_sampler.append(state.nsampler)
        else:
            if not kwargs.get("unweighted_merge", False):
                emask = state.mrs
            else:
                emask = state.mrs.copy()
                emask.a = emask.a > 0

            nsampler.append(libcommunity.init_neighbour_sampler(state.bg._Graph__graph,
                                                                _prop("e", state.bg, emask),
                                                                True, False))
            ncavity_sampler.append(libcommunity.init_neighbour_sampler(state.bg._Graph__graph,
                                                                       _prop("e", state.bg, emask),
                                                                       False, False))

        dl_enable = dl
        if dl and covariate and (state.slave or state.master):
            dl_enable = state.master
        if state.partition_stats.is_enabled() != dl_enable or edges_dl != state.edges_dl:
            if state.overlap:
                state._OverlapBlockState__init_partition_stats(empty=not dl_enable, edges_dl=edges_dl)
            else:
                state._BlockState__init_partition_stats(empty=not dl_enable, edges_dl=edges_dl)

    if _bm_test():
        assert main_state._BlockState__check_clabel(), "clabel already invalid!"
        S = main_state.entropy(dense=dense, multigraph=multigraph,
                               complete=False, dl=dl, edges_dl=edges_dl,
                               dl_deg_alt=False, xi_fast=True)
        assert not (isinf(S) or isnan(S)), "invalid entropy before sweep: %g" % S

    nmoves = 1
    try:
        if not covariate:
            state = states[0]
            if not state.overlap:
                dS, nmoves = libcommunity.move_sweep(state.g._Graph__graph,
                                                     state.bg._Graph__graph,
                                                     state._BlockState__get_emat(),
                                                     nsampler[0], ncavity_sampler[0],
                                                     _prop("e", state.bg, state.mrs),
                                                     _prop("v", state.bg, state.mrp),
                                                     _prop("v", state.bg, state.mrm),
                                                     _prop("v", state.bg, state.wr),
                                                     _prop("v", state.g, state.b),
                                                     _prop("v", state.bg, bclabel),
                                                     state.sweep_vertices,
                                                     state.block_list,
                                                     target_blocks,
                                                     state.deg_corr, dense, multigraph,
                                                     _prop("e", state.g, state.eweight),
                                                     _prop("v", state.g, state.vweight),
                                                     state.egroups,
                                                     _prop("e", state.g, state.esrcpos),
                                                     _prop("e", state.g, state.etgtpos),
                                                     float(beta), sequential,
                                                     parallel, random_move,
                                                     c, state.is_weighted,
                                                     nmerges,
                                                     _prop("v", state.g, merge_map),
                                                     niter,
                                                     state.partition_stats,
                                                     verbose, _get_rng())
            else:
                dS, nmoves = libcommunity.move_sweep_overlap(state.g._Graph__graph,
                                                             state.bg._Graph__graph,
                                                             state._BlockState__get_emat(),
                                                             nsampler[0],
                                                             ncavity_sampler[0],
                                                             _prop("e", state.bg, state.mrs),
                                                             _prop("v", state.bg, state.mrp),
                                                             _prop("v", state.bg, state.mrm),
                                                             _prop("v", state.bg, state.wr),
                                                             _prop("v", state.g, state.b),
                                                             _prop("v", state.bg, bclabel),
                                                             state.sweep_vertices,
                                                             state.block_list,
                                                             target_blocks,
                                                             state.deg_corr, dense, multigraph,
                                                             multigraph,
                                                             _prop("e", state.g, state.eweight),
                                                             _prop("v", state.g, state.vweight),
                                                             state.egroups,
                                                             _prop("e", state.g, state.esrcpos),
                                                             _prop("e", state.g, state.etgtpos),
                                                             float(beta),
                                                             sequential, parallel,
                                                             random_move, float(c),
                                                             ((nmerges == 0 and node_coherent) or
                                                              (nmerges > 0 and coherent_merge)),
                                                             state.is_weighted,
                                                             nmerges,
                                                             _prop("v", state.g, merge_map),
                                                             niter,
                                                             state.overlap_stats,
                                                             state.partition_stats,
                                                             verbose, _get_rng())
        else:
            if _bm_test():
                for l, state in enumerate(states):
                    assert state.mrs.fa.sum() == state.eweight.fa.sum(), (l, state.mrs.fa.sum(), state.eweight.fa.sum())
                    #assert state.mrs.a.sum() == state.eweight.a.sum(), (l, state.mrs.a.sum(), state.eweight.a.sum())

            if confine_layers:
                node_coherent = True

            dS, nmoves = libcommunity.cov_move_sweep(main_state.g._Graph__graph,
                                                     _prop("e", main_state.g, main_state.ec),
                                                     _prop("v", main_state.g, main_state.vc),
                                                     _prop("v", main_state.g, main_state.vmap),
                                                     [state.g._Graph__graph for state in states],
                                                     [state.bg._Graph__graph for state in states],
                                                     [state._BlockState__get_emat() for state in states],
                                                     nsampler, ncavity_sampler,
                                                     [_prop("e", state.bg, state.mrs) for state in states],
                                                     [_prop("v", state.bg, state.mrp) for state in states],
                                                     [_prop("v", state.bg, state.mrm) for state in states],
                                                     [_prop("v", state.bg, state.wr) for state in states],
                                                     _prop("v", main_state.g, main_state.b),
                                                     [_prop("v", state.g, state.b) for state in states],
                                                     main_state.bmap,
                                                     [_prop("v", state.g, state.g.vp["brmap"]) for state in states],
                                                     [state.free_blocks for state in states],
                                                     [state.master for state in states],
                                                     [state.slave for state in states],
                                                     _prop("v",  state.bg, bclabel),
                                                     [main_state.sweep_vertices, main_state.block_list, target_blocks],
                                                     main_state.deg_corr, dense, multigraph,
                                                     [_prop("e", state.g, state.eweight) for state in states],
                                                     [_prop("v", state.g, state.vweight) for state in states],
                                                     [state.egroups for state in states],
                                                     [_prop("e", state.g, state.esrcpos) for state in states],
                                                     [_prop("e", state.g, state.etgtpos) for state in states],
                                                     float(beta), sequential,
                                                     parallel, random_move,
                                                     (node_coherent, confine_layers),
                                                     c, main_state.is_weighted,
                                                     nmerges,
                                                     _prop("v", main_state.g, merge_map),
                                                     niter, main_state.B,
                                                     [state.partition_stats for state in states] if not main_state.overlap else [],
                                                     [state.partition_stats for state in states] if main_state.overlap else [],
                                                     [state.overlap_stats for state in states],
                                                     verbose, _get_rng())
    finally:
        for state in states:
            if random_move:
                state.egroups = None
            if nmerges > 0:
                state.nsampler = None
                state.egroups = None
        if covariate and nmoves > 0:
            main_state._CovariateBlockState__bg = None

        libcommunity.clear_safelog()
        libcommunity.clear_xlogx()
        libcommunity.clear_lgamma()

    if _bm_test():
        assert main_state._BlockState__check_clabel(), "clabel invalidated!"
        assert not (isinf(dS) or isnan(dS)), "invalid after sweep: %g" % dS
        if not covariate or nmerges == 0:
            S2 = main_state.entropy(dense=dense, multigraph=multigraph,
                                    complete=False, dl=dl, edges_dl=edges_dl,
                                    dl_deg_alt=False, xi_fast=True)
            c_dS = S2 - S
            if not abs(dS - c_dS) < 1e-6 * state.E:
                S3 = main_state.copy().entropy(dense=dense, multigraph=multigraph, complete=False,
                                               dl=dl, edges_dl=False, dl_deg_alt=False, xi_fast=True)
                print(dS, c_dS, nmoves, state.overlap, dense, multigraph,
                      main_state.deg_corr, main_state.is_weighted, node_coherent, beta, S2, S3)
            assert abs(dS - c_dS) < 1e-6 * state.E, "invalid delta S (%g, %g)" % (dS, c_dS)

    return dS, nmoves


def pmap(prop, value_map):
    """Maps all the values of `prop` to the values given by `value_map`, which
    is indexed by the values of `prop`."""
    if isinstance(prop, PropertyMap):
        a = prop.fa
    else:
        a = prop
    if isinstance(value_map, PropertyMap):
        value_map = value_map.fa
    if a.max() >= len(value_map):
        raise ValueError("value map is not large enough! %s, %s" % (a.max(),
                                                                    len(value_map)))
    if a.dtype != value_map.dtype:
        value_map = array(value_map, dtype=a.dtype)
    if a.dtype == "int64":
        libcommunity.vector_map64(a, value_map)
    else:
        libcommunity.vector_map(a, value_map)
    if isinstance(prop, PropertyMap):
        prop.fa = a

def reverse_map(prop, value_map):
    """Modify `value_map` such that the positions indexed by the values in `prop`
    correspond to their index in `prop`."""
    if isinstance(prop, PropertyMap):
        prop = prop.fa
    if isinstance(value_map, PropertyMap):
        a = value_map.fa
    else:
        a = value_map
    if prop.max() >= len(a):
        raise ValueError("value map is not large enough! (%d, %d)" % (prop.max(), len(a)))
    if prop.dtype != a.dtype:
        prop = array(prop, dtype=a.dtype)
    if a.dtype == "int64":
        libcommunity.vector_rmap64(prop, a)
    else:
        libcommunity.vector_rmap(prop, a)
    if isinstance(value_map, PropertyMap):
        value_map.fa = a

def continuous_map(prop):
    """Remap the values of ``prop`` in the continuous range :math:`[0, N-1]`."""
    if isinstance(prop, PropertyMap):
        a = prop.fa
    else:
        a = prop
    if a.max() < len(a):
        rmap = -ones(len(a), dtype=a.dtype)
        if a.dtype == "int64":
            libcommunity.vector_map64(a, rmap)
        else:
            libcommunity.vector_map(a, rmap)
    else:
        if a.dtype == "int64":
            libcommunity.vector_continuous_map64(a)
        else:
            libcommunity.vector_continuous_map(a)
    if isinstance(prop, PropertyMap):
        prop.fa = a

def greedy_shrink(state, B, **kwargs):
    if B > state.B:
        raise ValueError("Cannot shrink to a larger size!")

    kwargs = kwargs.copy()
    if kwargs.get("nmerge_sweeps", None) is None:
        kwargs["nmerge_sweeps"] = max((2 * state.g.num_edges()) // state.g.num_vertices(), 1)
    if "beta" in kwargs:
        del kwargs["beta"]

    verbose = kwargs.get("verbose", False)

    orig_state = state
    state = state.copy(B=state.B)

    # merge according to indirect neighbourhood; we put all group-nodes in their
    # own groups, and merge/move them until the desired size is reached
    curr_B = (state.wr.fa > 0).sum()
    assert curr_B > B, "shrinking to a larger size ?! (%d, %d, %d)" % (state.B, curr_B, B)

    random = kwargs.get("random_move", False)
    old_state = state
    if not state.overlap:
        state, n_map = state.get_block_state(vweight=True,
                                             deg_corr=state.deg_corr)
    merge_map = state.g.vertex_index.copy("int")

    if _bm_test():
        assert curr_B == (state.wr.a > 0).sum(), (curr_B, (state.wr.a > 0).sum())

    unweighted = False
    kwargs["c"] = 0 if not random else numpy.inf
    kwargs["dl"] = False
    while curr_B > B:
        dS, nmoves = mcmc_sweep(state, beta=numpy.inf,
                                niter=kwargs["nmerge_sweeps"],
                                nmerges=curr_B - B,
                                merge_map=merge_map,
                                unweighted_merge=unweighted,
                                **kwargs)

        curr_B = (state.wr.a > 0).sum()

        if _bm_test():
            assert curr_B == len(set(state.b.fa)), (curr_B, len(set(state.b.fa)))

        if verbose:
            print("merging, B=%d" % curr_B, "left:", curr_B - B,
                  "(%g, %d%s%s)" % (dS, nmoves, ", random" if random else "",
                                    ", unweighted" if unweighted else ""))

        if nmoves == 0:
            if not unweighted:
                unweighted = True
            else:
                kwargs["c"] = numpy.inf
                random = True

    if _bm_test():
        assert curr_B == (state.wr.a > 0).sum(), (curr_B, (state.wr.a > 0).sum())

    if not state.overlap:
        unilevel_minimize(state, **kwargs)  # block level moves
        pmap(merge_map, state.b)
        pmap(n_map, merge_map)
        continuous_map(n_map)
        state = orig_state.copy(b=n_map, B=B)
    else:
        pmap(merge_map, state.b)
        continuous_map(merge_map)
        state = orig_state.copy(b=merge_map, B=B)


    if _bm_test():
        assert state._BlockState__check_clabel(), "clabel already invalidated!"
        assert curr_B == (state.wr.a > 0).sum(), (curr_B, (state.wr.a > 0).sum(), len(state.wr.a), state.B)
        curr_B = (state.wr.a > 0).sum()
        assert state.B == curr_B, (state.B, curr_B)
        assert state.B == B, (state.B, B)

    return state


def unilevel_minimize(state, nsweeps=10, adaptive_sweeps=True, epsilon=0,
                      anneal=(1., 1.), greedy=True, c=0., dl=False, dense=False,
                      multigraph=True, sequential=True, parallel=False,
                      verbose=False, **kwargs):
    kwargs = kwargs.copy()
    kwargs.update(dict(c=c, dl=dl, dense=dense, multigraph=multigraph,
                       sequential=sequential, parallel=parallel))

    t_dS, t_nmoves = 0, 0

    S = state.entropy()

    if not adaptive_sweeps:
        if not greedy:
            if verbose:
                print("Performing sweeps for beta = %g, B=%d (N=%d)..." % \
                      (beta, state.B, state.g.num_vertices()))
            delta, nmoves = mcmc_sweep(state, beta=anneal[0], niter=nsweeps,
                                       **kwargs)
            S += delta
            t_nmoves += nmoves

            if verbose:
                print("... performed %d sweeps with %d vertex moves" % (nsweeps, nmoves))

        if verbose:
            print("Performing sweeps for beta = ∞, B=%d (N=%d)..." % \
                  (state.B, state.g.num_vertices()))

        delta, nmoves = mcmc_sweep(state, beta=numpy.inf, niter=nsweeps,
                                   **kwargs)
        if state.overlap:
            ds, nm = mcmc_sweep(state, niter=nsweeps, beta=numpy.inf,
                                node_coherent=True, **kwargs)
            delta += ds
            nmoves += nm
        S += delta
        t_nmoves += nmoves

        if verbose:
            print("... performed %d sweeps with %d vertex moves" % (nsweeps, nmoves))
    else:
        # adaptive mode
        min_dl = S
        max_dl = S
        count = 0
        bump = False
        beta =  anneal[0]
        last_min = min_dl
        greedy_step = greedy
        total_nmoves = 0

        if verbose and not greedy:
            print("Performing sweeps for beta = %g, B=%d (N=%d)..." % \
                   (beta, state.B, state.g.num_vertices()))

        eps = 1e-8 * state.E
        niter = 0
        while True:
            if greedy_step:
                break
            if count > nsweeps:
                if not bump:
                    min_dl = max_dl = S
                    bump = True
                    count = 0
                else:
                    if anneal[1] <= 1 or min_dl == last_min:
                        break
                    else:
                        beta *= anneal[1]
                        count = 0
                        last_min = min_dl
                        if verbose:
                            print("Performing sweeps for beta = %g, B=%d (N=%d)..." % \
                                   (beta, state.B, state.g.num_vertices()))

            delta, nmoves = mcmc_sweep(state, beta=beta, **kwargs)

            if state.overlap and beta == numpy.inf:
                ds, nm = mcmc_sweep(state, beta=beta, node_coherent=True, **kwargs)
                delta += ds
                nmoves += nm

            S += delta
            niter += 1
            total_nmoves += nmoves

            t_dS += delta
            t_nmoves += nmoves

            if S > max_dl + eps:
                max_dl = S
                count = 0
            elif S < min_dl - eps:
                min_dl = S
                count = 0
            else:
                count += 1

        if verbose:
            if not greedy_step:
                print("... performed %d sweeps with %d vertex moves" % (niter, total_nmoves))
            print("Performing sweeps for beta = ∞, B=%d (N=%d)..." % \
                  (state.B, state.g.num_vertices()))

        if not greedy_step:
            min_dl = S
            count = 0

        niter = 0
        total_nmoves = 0
        deltaS = 0
        while count <= nsweeps:
            delta, nmoves = mcmc_sweep(state, niter=nsweeps, beta=numpy.inf,
                                       **kwargs)

            if state.overlap:
                ds, nm = mcmc_sweep(state, niter=nsweeps, beta=numpy.inf,
                                    node_coherent=True, **kwargs)
                delta += ds
                nmoves += nm

            deltaS += delta
            S += delta
            niter += nsweeps
            total_nmoves += nmoves

            t_dS += delta
            t_nmoves += nmoves

            if abs(delta / nsweeps) > eps and nmoves / (nsweeps * state.g.num_vertices()) > epsilon:
                min_dl = S
                count = 0
            else:
                count += nsweeps

        if verbose:
            print("... performed %d sweeps with %d vertex moves (dS = %g)" % (niter, total_nmoves, deltaS))

        bi = state.b

    state.clear_cache()

    return t_dS, t_nmoves


def multilevel_minimize(state, B, nsweeps=10, adaptive_sweeps=True, epsilon=0,
                        anneal=(1., 1.), r=2., nmerge_sweeps=10, greedy=True,
                        c=0., dl=False, dense=False, multigraph=True,
                        sequential=True, parallel=False, verbose=False,
                        **kwargs):
    r"""Performs an agglomerative heuristic, which progressively merges blocks together (while allowing individual node moves) to achieve a good partition in ``B`` blocks.

    Parameters
    ----------
    state : :class:`~graph_tool.community.BlockState`, :class:`~graph_tool.community.OverlapBlockState` or :class:`~graph_tool.community.CovariateBlockState`
        The block state.
    B : ``int``
        The desired number of blocks.
    nsweeps : ``int`` (optional, default: ``10``)
        The number of sweeps done after each merge step to reach the local
        minimum.
    adaptive_sweeps : ``bool`` (optional, default: ``True``)
        If ``True``, the number of sweeps necessary for the local minimum will
        be estimated to be enough so that no more than ``epsilon * N`` nodes
        changes their states in the last ``nsweeps`` sweeps.
    epsilon : ``float`` (optional, default: ``0``)
        Converge criterion for ``adaptive_sweeps``.
    anneal : pair of ``floats`` (optional, default: ``(1., 1.)``)
        The first value specifies the starting value for  ``beta`` of the MCMC
        steps, and the second value is the factor which is multiplied to ``beta``
        after each estimated equilibration (according to ``nsweeps`` and
        ``adaptive_sweeps``).
    r : ``float`` (optional, default: ``2.``)
        Agglomeration ratio for the merging steps. Each merge step will attempt
        to find the best partition into :math:`B_{i-1} / r` blocks, where
        :math:`B_{i-1}` is the number of blocks in the last step.
    nmerge_sweeps : `int` (optional, default: `10`)
        The number of merge sweeps done, where in each sweep a better merge
        candidate is searched for every block.
    greedy : ``bool`` (optional, default: ``True``)
        If ``True``, the value of ``beta`` of the MCMC steps are kept at
        infinity for all steps. Otherwise they change according to the ``anneal``
        parameter.
    c : ``float`` (optional, default: ``0.0``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
    dl : ``bool`` (optional, default: ``False``)
        If ``True``, the change in the whole description length will be
        considered after each vertex move, not only the entropy.
    dense : ``bool`` (optional, default: ``False``)
        If ``True``, the "dense" variant of the entropy will be computed.
    multigraph : ``bool`` (optional, default: ``False``)
        If ``True``, the multigraph entropy will be used. Only has an effect
        if ``dense == True``.
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    parallel : ``bool`` (optional, default: ``False``)
        If ``True``, the updates are performed in parallel (multiple
        threads).
    vertices: ``list of ints`` (optional, default: ``None``)
        A list of vertices which will be attempted to be moved. If ``None``, all
        vertices will be attempted.
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------

    state : :class:`~graph_tool.community.BlockState`
        The new :class:`~graph_tool.community.BlockState` with ``B`` blocks.

    Notes
    -----

    This algorithm performs an agglomerative heuristic on the current block state,
    where blocks are progressively merged together, using repeated applications of
    the :func:`mcmc_sweep` moves, at different scales. See [peixoto-efficient-2014]_
    for more details.

    This algorithm has a complexity of :math:`O(V\ln^2 V)`, where :math:`V` is the
    number of nodes in the network.

    Examples
    --------
    .. testsetup:: multilevel_minimize

       gt.seed_rng(42)
       np.random.seed(42)

    .. doctest:: multilevel_minimize

       >>> g = gt.collection.data["polblogs"]
       >>> g = gt.GraphView(g, vfilt=gt.label_largest_component(gt.GraphView(g, directed=False)))
       >>> state = gt.BlockState(g, B=g.num_vertices(), deg_corr=True)
       >>> state = gt.multilevel_minimize(state, B=2)
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=state.get_blocks(), output="polblogs_agg.pdf")
       <...>

    .. testcleanup:: multilevel_minimize

       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=state.get_blocks(), output="polblogs_agg.png")

    .. figure:: polblogs_agg.*
       :align: center

       Block partition of a political blogs network with :math:`B=2`.

     References
    ----------

    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89, 012804 (2014),
       :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
    """

    b_cache = kwargs.get("b_cache", {})

    nkwargs = dict(nsweeps=nsweeps, epsilon=epsilon, c=c, dl=dl, dense=dense,
                   multigraph=multigraph, nmerge_sweeps=nmerge_sweeps,
                   adaptive_sweeps=adaptive_sweeps, sequential=sequential,
                   parallel=parallel)
    kwargs = copy.copy(kwargs)
    kwargs.update(nkwargs)

    nonoverlap_compare = kwargs.get("nonoverlap_compare", False)
    if "nonoverlap_compare" in kwargs:
        del kwargs["nonoverlap_compare"]
    assert not nonoverlap_compare, "don't do this"

    orig_state = state

    if _bm_test():
        assert state._BlockState__check_clabel(), "orig clabel already invalidated!"

    # some simple boundary conditions
    if B == 1:
        if state.clabel.fa.max() > 0:
            raise ValueError("Cannot shrink to B = 1 without invalidating constraints")
        bi = state.g.new_vertex_property("int")
        state = state.copy(b=bi)
        return state
    if B == state.g.num_vertices():
        bi = state.g.vertex_index.copy("int")
        state = state.copy(b=bi)
        return state

    Bi = state.B
    while True:
        # keep reducing B by a factor of "r", until desired size is reached
        Bi = max(int(round(Bi / r)), B)
        if Bi == state.B and Bi > B:
            Bi -= 1

        # check cache for previous results
        if b_cache is not None and Bi in b_cache:
            if _bm_test():
                assert (state.clabel.fa == b_cache[Bi][1].clabel.fa).all(), "wrong clabel in cache"
                assert state._BlockState__check_clabel(), "clabel already invalidated before cache"
                assert b_cache[Bi][1]._BlockState__check_clabel(), "clabel already invalidated after cache"
            state = b_cache[Bi][1].copy()

        if _bm_test():
            assert state._BlockState__check_clabel(), "clabel already invalidated!"

        # if necessary, shrink state
        if Bi < state.B:
            if verbose:
                print("Shrinking:", state.B, "->", Bi)

            state = greedy_shrink(state, B=Bi, verbose=verbose, **kwargs)

            if _bm_test():
                assert state._BlockState__check_clabel(), "clabel invalidated after shrink"

        if verbose:
            print("Minimizing for:", state.B)

        dS, nmoves = unilevel_minimize(state, verbose=verbose, **kwargs)

        if _bm_test():
            assert state._BlockState__check_clabel(), "clabel invalidated after unilevel minimize!"

        if state.overlap and state.deg_corr and nonoverlap_compare:
            if verbose:
                print("Attempting nonoverlapping minimize...")
            nstate = state.copy(b=state.get_nonoverlap_blocks(), overlap=False)
            assert nstate.B <= nstate.N
            nstate = multilevel_minimize(nstate, B=Bi, verbose=verbose, **kwargs)
            nstate = nstate.copy(overlap=True, clabel=state.clabel.fa)
            unilevel_minimize(nstate, **kwargs)

            if nstate.B > Bi:
                nstate = multilevel_minimize(nstate, B=Bi, verbose=verbose,
                                             nonoverlap_compare=False, **kwargs)

            if nstate.entropy(dense=dense, multigraph=multigraph) < state.entropy(dense=dense, multigraph=multigraph):
                if verbose:
                    print("Nonoverlapping minimize improved.")
                state = nstate

                if _bm_test():
                    assert state._BlockState__check_clabel(), "clabel invalidated after nonoverlap compare!"

        if Bi == B:
            break

    return state

def get_b_dl(state, dense, multigraph, nested_dl, complete=False,
             nested_overlap=False, dl_ent=False):
    if not nested_dl:
        dl = state.entropy(dense=dense, multigraph=multigraph, dl=True,
                           complete=complete, dl_ent=dl_ent)
    else:
        dl = state.entropy(dense=dense, multigraph=multigraph, dl=True,
                           edges_dl=False, complete=complete,
                           dl_ent=dl_ent)

        bclabel = state.get_bclabel()

        bstate = state.get_block_state(b=bclabel, overlap=nested_overlap)[0]

        dl += bstate.entropy(dl=True, edges_dl=False, dense=True,
                             multigraph=True, dl_ent=dl_ent)
    return dl


def get_state_dl(B, b_cache, sparse_heuristic, **kwargs):
    bs = b_cache
    previous = None
    verbose = kwargs.get("verbose", False)

    if B in bs:
        # A previous finished result is available. Use that and keep going.
        if verbose:
            print("(using previous finished result for B=%d)" % B)
        if _bm_test():
            dl = get_b_dl(bs[B][1],
                          kwargs.get("dense", False),
                          kwargs.get("multigraph", False),
                          kwargs.get("nested_dl", False),
                          kwargs.get("complete", False),
                          kwargs.get("nested_overlap", False),
                          kwargs.get("dl_ent", False))
            assert abs(dl - bs[B][0]) < bs[B][1].E * 1e-8, "inconsistent DL values! (%g, %g, overlap: %s)" % (dl, bs[B][0], str(bs[B][1].overlap))
        return bs[B][0]
    elif B in bs:
        # A previous unfinished result is available. Use that as the starting point.
        if verbose:
            print("(starting from previous result for B=%d)" % B)
        previous = bs[B]
        state = previous[1].copy()

        if _bm_test():
            assert previous[1]._BlockState__check_clabel(), "previous clabel already invalidated!"
            assert state._BlockState__check_clabel(), "previous clabel already invalidated!"
            dl = get_b_dl(state, kwargs.get("dense", False),
                          kwargs.get("multigraph", False),
                          kwargs.get("nested_dl", False),
                          kwargs.get("complete", False),
                          kwargs.get("nested_overlap", False),
                          kwargs.get("dl_ent", False))
            assert abs(dl - bs[B][0]) < 1e-8, "inconsistent DL values! (%g, %g)" % (dl, bs[B][0])
            dl = get_b_dl(previous[1], kwargs.get("dense", False),
                          kwargs.get("multigraph", False),
                          kwargs.get("nested_dl", False),
                          kwargs.get("complete", False),
                          kwargs.get("nested_overlap", False),
                          kwargs.get("dl_ent", False))
            assert abs(dl - previous[0]) < 1e-8, "inconsistent DL values! (%g, %g) (!?)" % (dl, previous[0])
    else:
        # No previous result is available.
        bs_keys = [k for k in bs.keys() if type(k) != str]
        B_sup = max(max(bs_keys), B) if len(bs_keys) > 0 else B
        for Bi in bs_keys:
            if Bi > B and Bi < B_sup:
                B_sup = Bi
        if B_sup == B or not kwargs["shrink"]:
            # Start from scratch.
            raise RuntimeError("should not happen! B=%d, B_sup=%d, %s" % (B, B_sup, str(bs)))
        else:
            # Start from result with B_sup > B, and shrink it.
            if kwargs.get("verbose", False):
                print("(shrinking from B=%d to B=%d)" % (B_sup, B))
            state = bs[B_sup][1].copy()
        if _bm_test():
            assert state._BlockState__check_clabel(), "larger B clabel already invalidated!"

    # perform the actual minimization
    args = kwargs.copy()
    args["b_cache"] = bs
    if sparse_heuristic:
        args["dense"] = False
        args["multigraph"] = False
    #args["verbose"] = False

    state = multilevel_minimize(state, B, **args)

    if _bm_test():
        assert state._BlockState__check_clabel(), "clabel invalidated after minimize"
        assert state.B == B

    dl = get_b_dl(state,
                  kwargs.get("dense", False),
                  kwargs.get("multigraph", False),
                  kwargs.get("nested_dl", False),
                  kwargs.get("complete", False),
                  kwargs.get("nested_overlap", False),
                  kwargs.get("dl_ent", False))


    if _bm_test():
        assert state._BlockState__check_clabel(), "clabel invalidated after minimize (?!)"

    if previous is None or dl < previous[0]:
        # the current result improved the previous one
        bs[B] = [dl, state]
        if kwargs.get("verbose", False):
            print("(using new result for B=%d with L=%g)" % (B, dl))

    else:
        # the previous result is better than the current one
        if kwargs.get("verbose", False):
            print("(kept old result for B=%d with L=%g [vs L=%g])" % (B, previous[0], dl))
        dl = previous[0]

        if _bm_test():
            tdl = get_b_dl(previous[1], kwargs.get("dense", False),
                           kwargs.get("multigraph", False),
                           kwargs.get("nested_dl", False),
                           kwargs.get("complete", False),
                           kwargs.get("nested_overlap", False),
                           kwargs.get("dl_ent", False))
            assert abs(dl - tdl) < 1e-8, "inconsistent DL values! (%g, %g)" % (dl, tdl)

    if _bm_test():
        assert not isinf(dl)
    return dl

def fibo(n):
    phi = (1 + sqrt(5)) / 2
    return int(round(phi ** n / sqrt(5)))

def fibo_n_floor(x):
    phi = (1 + sqrt(5)) / 2
    n = floor(log(x * sqrt(5) + 0.5) / log(phi))
    return int(n)

def get_mid(a, b, random=False):
    if random:
        return a + numpy.random.randint(b - a + 1)
    else:
        n = fibo_n_floor(b - a)
        return b - fibo(n - 1)

def is_fibo(x):
    return fibo(fibo_n_floor(x)) == x


def minimize_blockmodel_dl(g, deg_corr=True, overlap=False, ec=None,
                           layers=False, confine_layers=False,
                           nonoverlap_init=False, dl=True, multigraph=True,
                           dense=False, sparse_heuristic=False, eweight=None,
                           vweight=None, clabel=None, c=0, nsweeps=10,
                           adaptive_sweeps=True, epsilon=1e-3, anneal=(1., 1.),
                           greedy_cooling=True, sequential=True, parallel=False,
                           r=2, nmerge_sweeps=10, max_B=None, min_B=None,
                           mid_B=None, random_bisection=False, exhaustive=False,
                           init_states=None, max_BE=None, verbose=False,
                           **kwargs):
    r"""Find the block partition of an unspecified size which minimizes the description
    length of the network, according to the stochastic blockmodel ensemble which
    best describes it.

    Parameters
    ----------
    g : :class:`~graph_tool.Graph`
        Graph to be modelled.
    deg_corr : ``bool`` (optional, default: ``True``)
        If ``True``, the degree-corrected version of the blockmodel ensemble will
        be assumed, otherwise the traditional variant will be used.
    overlap : ``bool`` (optional, default: ``False``)
        If ``True``, the mixed-membership version of the blockmodel will be used.
    ec : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        If provided, this should be an edge :class:`~graph_tool.PropertyMap`
        containing edge covariates that will split the network in discrete
        layers.
    layers : ``bool`` (optional, default: ``False``)
        If ``True``, and `´ec`` is not ``None``, the "independent layers"
        version of the model is used, instead of the "edge covariates" version.
    confine_layers : ``bool`` (optional, default: ``False``)
        If ``True``, and `´ec`` is not ``None`` and ``overlap == True``, the
        half edges will only be moved in such a way that inside each layer the
        group membership remains non-overlapping.
    nonoverlap_init : ``bool`` (optional, default: ``False``)
        If ``True``, and `´overlap==True``, the minimization starts by first
        fitting the non-overlapping model, and using that as a starting state.
    dl : ``bool`` (optional, default: ``True``)
        If ``True``, the change in the whole description length will be
        considered after each vertex move, not only the entropy.
    multigraph : ``bool`` (optional, default: ``False``)
        If ``True``, the multigraph entropy will be used.
    dense : ``bool`` (optional, default: ``False``)
        If ``True``, the "dense" variant of the entropy will be computed.
    sparse_heuristic : ``bool`` (optional, default: ``False``)
        If ``True``, the sparse entropy will be used to find the best partition,
        but the dense entropy will be used to compare different partitions. This
        has an effect only if ``dense == True``.
    eweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge multiplicities (for multigraphs or block graphs).
    vweight : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex multiplicities (for block graphs).
    clabel : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Constraint labels on the vertices. If supplied, vertices with different
        label values will not be clustered in the same group.
    c : ``float`` (optional, default: ``1.0``)
        This parameter specifies how often fully random moves are attempted,
        instead of more likely moves based on the inferred block partition.
        For ``c == 0``, no fully random moves are attempted, and for ``c == inf``
        they are always attempted.
    nsweeps : ``int`` (optional, default: ``10``)
        The number of sweeps done after each merge step to reach the local
        minimum.
    adaptive_sweeps : ``bool`` (optional, default: ``True``)
        If ``True``, the number of sweeps necessary for the local minimum will
        be estimated to be enough so that no more than ``epsilon * N`` nodes
        changes their states in the last ``nsweeps`` sweeps.
    epsilon : ``float`` (optional, default: ``1e-3``)
        Converge criterion for ``adaptive_sweeps``.
    anneal : pair of ``floats`` (optional, default: ``(1., 1.)``)
        The first value specifies the starting value for  ``beta`` of the MCMC
        steps, and the second value is the factor which is multiplied to ``beta``
        after each estimated equilibration (according to ``nsweeps`` and
        ``adaptive_sweeps``).
    greedy_cooling : ``bool`` (optional, default: ``True``)
        If ``True``, the value of ``beta`` of the MCMC steps are kept at
        infinity for all steps. Otherwise they change according to the ``anneal``
        parameter.
    sequential : ``bool`` (optional, default: ``True``)
        If ``True``, the move attempts on the vertices are done in sequential
        random order. Otherwise a total of `N` moves attempts are made, where
        `N` is the number of vertices, where each vertex can be selected with
        equal probability.
    parallel : ``bool`` (optional, default: ``False``)
        If ``True``, the updates are performed in parallel (multiple
        threads).
    r : ``float`` (optional, default: ``2.``)
        Agglomeration ratio for the merging steps. Each merge step will attempt
        to find the best partition into :math:`B_{i-1} / r` blocks, where
        :math:`B_{i-1}` is the number of blocks in the last step.
    nmerge_sweeps : `int` (optional, default: `10`)
        The number of merge sweeps done, where in each sweep a better merge
        candidate is searched for every block.
    max_B : ``int`` (optional, default: ``None``)
        Maximum number of blocks tried. If not supplied, it will be
        automatically determined.
    min_B : ``int`` (optional, default: `1`)
        Minimum number of blocks tried.
    mid_B : ``int`` (optional, default: ``None``)
        Middle of the range which brackets the minimum. If not supplied, will be
        automatically determined.
    random_bisection : ``bool`` (optional, default: ``False``)
        If ``True``, the best value of ``B`` will be found by performing a
        random bisection, instead of a Fibonacci search.
    exhaustive : ``bool`` (optional, default: ``False``)
        If ``True``, the best value of ``B`` will be found by testing all possible
        values, instead of performing a bisection search.
    init_states : ``list`` of :class:`~graph_tool.community.BlockState` or :class:`~graph_tool.community.OverlapBlockState` (optional, default: ``None``)
        If provided, this list of block states will be used when performing the
        minimization.
    max_BE : ``int`` (optional, default: ``1000``)
        If the number of blocks exceeds this number, a sparse representation of
        the block graph is used, which is slightly less efficient, but uses less
        memory,
    verbose : ``bool`` (optional, default: ``False``)
        If ``True``, verbose information is displayed.

    Returns
    -------
    state : :class:`~graph_tool.community.BlockState` or :class:`~graph_tool.community.OverlapBlockState`
       The block state containing the best model fit.

    Notes
    -----

    This algorithm attempts to find a block partition of an unspecified size
    which minimizes the description length of the network,

    .. math::

       \Sigma_{t/c} = \mathcal{S}_{t/c} + \mathcal{L}_{t/c},

    where :math:`\mathcal{S}_{t/c}` is the blockmodel entropy (as described in
    the docstring of :func:`mcmc_sweep`, :meth:`BlockState.entropy`
    :meth:`OverlapBlockState.entropy`) and :math:`\mathcal{L}_{t/c}` is the
    information necessary to describe the model (as described in the docstring
    of :func:`model_entropy`, :meth:`BlockState.entropy` and
    :meth:`OverlapBlockState.entropy`).

    The algorithm works by minimizing the entropy :math:`\mathcal{S}_{t/c}` for
    specific values of :math:`B` via :func:`mcmc_sweep` (with :math:`\beta = 1`
    and :math:`\beta\to\infty`), and minimizing :math:`\Sigma_{t/c}` via an
    one-dimensional Fibonacci search on :math:`B`. See
    [peixoto-parsimonious-2013]_ and [peixoto-efficient-2014]_ for more details.

    This algorithm has a complexity of :math:`O(\tau V\ln^2 B_{\text{max}})`,
    where :math:`V` is the number of nodes in the network, :math:`\tau` is the
    mixing time of the MCMC, and :math:`B_{\text{max}}` is the maximum number of
    blocks considered. If :math:`B_{\text{max}}` is not supplied, it is computed
    as :math:`\sim\sqrt{E}` via :func:`get_max_B`, in which case the complexity
    becomes :math:`O(\tau E\ln E)`.


    Examples
    --------
    .. testsetup:: mdl

       gt.seed_rng(42)
       np.random.seed(42)

    .. doctest:: mdl

       >>> g = gt.collection.data["polbooks"]
       >>> state = gt.minimize_blockmodel_dl(g)
       >>> b = state.b
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=b, vertex_shape=b, output="polbooks_blocks_mdl.pdf")
       <...>

    .. testcleanup:: mdl

       gt.graph_draw(g, pos=g.vp["pos"], vertex_fill_color=b, vertex_shape=b, output="polbooks_blocks_mdl.png")

    .. figure:: polbooks_blocks_mdl.*
       :align: center

       Block partition of a political books network, which minimizes the description
       length of the network according to the degree-corrected stochastic blockmodel.


    .. testsetup:: mdl_overlap

       gt.seed_rng(42)
       np.random.seed(42)

    .. doctest:: mdl_overlap

       >>> g = gt.collection.data["polbooks"]
       >>> state = gt.minimize_blockmodel_dl(g, overlap=True)
       >>> ret = state.get_overlap_blocks()
       >>> bv, bc = ret[0], ret[-1]
       >>> eg = gt.get_block_edge_gradient(g, state.get_edge_blocks())
       >>> gt.graph_draw(g, g.vp["pos"], vertex_pie_fractions=bc,
       ...               vertex_pie_colors=bv, vertex_shape="pie",
       ...               edge_gradient=eg,
       ...               output="polbooks_overlap_blocks_mdl.pdf")
       <...>

    .. testcleanup:: mdl_overlap

       gt.graph_draw(g, g.vp["pos"], vertex_pie_fractions=bc,
                     vertex_pie_colors=bv, vertex_shape="pie",
                     edge_gradient=eg,
                     output="polbooks_overlap_blocks_mdl.png")

    .. figure:: polbooks_overlap_blocks_mdl.*
       :align: center

       Overlapping partition of a political books network, which minimizes the
       description length of the network according to the overlapping
       degree-corrected stochastic blockmodel.

    References
    ----------

    .. [holland-stochastic-1983] Paul W. Holland, Kathryn Blackmond Laskey,
       Samuel Leinhardt, "Stochastic blockmodels: First steps",
       Carnegie-Mellon University, Pittsburgh, PA 15213, U.S.A., :doi:`10.1016/0378-8733(83)90021-7`
    .. [faust-blockmodels-1992] Katherine Faust, and Stanley
       Wasserman. "Blockmodels: Interpretation and Evaluation." Social Networks
       14, no. 1-2 (1992): 5-61. :doi:`10.1016/0378-8733(92)90013-W`
    .. [karrer-stochastic-2011] Brian Karrer, and M. E. J. Newman. "Stochastic
       Blockmodels and Community Structure in Networks." Physical Review E 83,
       no. 1 (2011): 016107. :doi:`10.1103/PhysRevE.83.016107`.
    .. [peixoto-entropy-2012] Tiago P. Peixoto "Entropy of Stochastic Blockmodel
       Ensembles." Physical Review E 85, no. 5 (2012): 056122. :doi:`10.1103/PhysRevE.85.056122`,
       :arxiv:`1112.6028`.
    .. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module inference in large networks",
       Phys. Rev. Lett. 110, 148701 (2013), :doi:`10.1103/PhysRevLett.110.148701`, :arxiv:`1212.4794`.
    .. [peixoto-efficient-2014] Tiago P. Peixoto, "Efficient Monte Carlo and greedy
       heuristic for the inference of stochastic block models", Phys. Rev. E 89, 012804 (2014),
       :doi:`10.1103/PhysRevE.89.012804`, :arxiv:`1310.4378`.
    .. [peixoto-model-2016] Tiago P. Peixoto, "Model selection and hypothesis
       testing for large-scale network models with overlapping groups",
       Phys. Rev. X 5, 011033 (2016), :doi:`10.1103/PhysRevX.5.011033`,
       :arxiv:`1409.3059`.
    .. [peixoto-inferring-2016] Tiago P. Peixoto, "Inferring the mesoscale
       structure of layered, edge-valued and time-varying networks",
       :arXiv:`1504.02381`
    """

    nested_dl = kwargs.get("nested_dl", False)
    nested_overlap = kwargs.get("nested_overlap", False)
    nonoverlap_compare = kwargs.get("nonoverlap_compare", False)
    dl_ent = kwargs.get("dl_ent", False)
    ignore_degrees = kwargs.get("ignore_degrees", None)

    b_cache = {}

    if overlap and nonoverlap_init:
        if verbose:
            print("Non-overlapping initialization...")
        state = minimize_blockmodel_dl(g=g, ec=ec,
                                       layers=layers,
                                       eweight=eweight, vweight=vweight,
                                       deg_corr=deg_corr, dl=dl, dense=dense,
                                       multigraph=multigraph,
                                       sparse_heuristic=sparse_heuristic, c=c,
                                       nsweeps=nsweeps,
                                       adaptive_sweeps=adaptive_sweeps,
                                       epsilon=epsilon, anneal=anneal,
                                       greedy_cooling=greedy_cooling,
                                       sequential=sequential, parallel=parallel,
                                       r=r, nmerge_sweeps=nmerge_sweeps,
                                       max_B=max_B, min_B=min_B, mid_B=mid_B,
                                       clabel=clabel if isinstance(clabel, PropertyMap) else None,
                                       exhaustive=exhaustive, max_BE=max_BE,
                                       nested_dl=nested_dl, overlap=False,
                                       init_states=None, dl_ent=dl_ent,
                                       verbose=verbose)
        state = state.copy(overlap=True, clabel=clabel)
        unilevel_minimize(state, nsweeps=nsweeps, epsilon=epsilon, c=c, dl=dl,
                          nmerge_sweeps=nmerge_sweeps, sequential=sequential)
        max_B = state.B
        init_states = [state]

        if min_B is None:
            min_B = state.clabel.fa.max() + 1

        if verbose:
            print("Overlapping minimization starting from B=", max_B)

    if min_B is None:
        if clabel is None:
            min_B = 1
        elif isinstance(clabel, PropertyMap):
            min_B = clabel.fa.max() + 1
        else:
            min_B = clabel.max() + 1
    elif clabel is not None:
        C = clabel.fa.max() + 1 if isinstance(clabel, PropertyMap) else clabel.max() + 1
        if C > min_B:
            raise ValueError("value of min_B=%d is not consistent with the enforced constraints of size %d" % (min_B, C))

    if max_B is None:
        if dense:
            max_B = max(g.num_vertices(), 1)
        else:
            max_B = get_max_B(g.num_vertices(), g.num_edges(), g.is_directed())
        if verbose:
            print("max_B:", max_B)

    if mid_B is None:
        mid_B = get_mid(min_B, max_B, random_bisection)

    greedy = greedy_cooling
    shrink = True

    kwargs = dict(nsweeps=nsweeps, adaptive_sweeps=adaptive_sweeps, c=c,
                  sequential=sequential, parallel=parallel, shrink=shrink, r=r,
                  anneal=anneal, greedy=greedy, epsilon=epsilon,
                  nmerge_sweeps=nmerge_sweeps, deg_corr=deg_corr, dense=dense,
                  multigraph=multigraph, dl=dl,
                  sparse_heuristic=sparse_heuristic, nested_dl=nested_dl,
                  nested_overlap=nested_overlap,
                  nonoverlap_compare=nonoverlap_compare, dl_ent=dl_ent,
                  confine_layers=confine_layers, b_cache=b_cache,
                  verbose=verbose)

    if init_states is not None:
        for state in init_states:
            if _bm_test():
                assert state._BlockState__check_clabel(), "init state has invalid clabel!"
            dl = get_b_dl(state,
                          kwargs.get("dense", False),
                          kwargs.get("multigraph", False),
                          kwargs.get("nested_dl", False),
                          kwargs.get("complete", False),
                          kwargs.get("nested_overlap", False),
                          kwargs.get("dl_ent", False))
            b_cache[state.B] = [dl, state]

    B_init = True
    for Bi, bstate in b_cache.items():
        if Bi >= max_B:
            B_init = False

    if B_init:
        if ec is None:
            if overlap:
                state = OverlapBlockState(g, B=2 * g.num_edges(),
                                          deg_corr=deg_corr, vweight=vweight,
                                          eweight=eweight, clabel=clabel,
                                          max_BE=max_BE)
            else:
                state = BlockState(g, B=g.num_vertices(), deg_corr=deg_corr,
                                   vweight=vweight, eweight=eweight,
                                   clabel=clabel, max_BE=max_BE,
                                   ignore_degrees=ignore_degrees)

        else:
            if overlap:
                if confine_layers:
                    be = init_layer_confined(g, ec)
                    B_init = None
                else:
                    be = None
                    B_init = 2 * g.num_edges()
            else:
                be = None
                B_init = g.num_vertices()
            state = CovariateBlockState(g, ec=ec, layers=layers,
                                        B=B_init,
                                        b=be,
                                        deg_corr=deg_corr,
                                        overlap=overlap, vweight=vweight,
                                        eweight=eweight, clabel=clabel,
                                        max_BE=max_BE)
            if overlap and confine_layers and max_B > state.B:
                max_B = state.B

        if _bm_test():
            assert state._BlockState__check_clabel(), "clabel invalid at creation!"

        dl = get_b_dl(state,
                      kwargs.get("dense", False),
                      kwargs.get("multigraph", False),
                      kwargs.get("nested_dl", False),
                      kwargs.get("complete", False),
                      kwargs.get("nested_overlap", False),
                      kwargs.get("dl_ent", False))
        b_cache[state.B] = [dl, state]

    if exhaustive:
        if max_B not in b_cache:
            Bi = max(b_cache.keys())
            state = b_cache[Bi][1]
            state = multilevel_minimize(state, B=max_B, **kwargs)

        for B in reversed(range(min_B, max_B + 1)):
            if B in b_cache:
                state = b_cache[B][1]
                continue

            args = kwargs.copy()
            if sparse_heuristic:
                args["dense"] = False

            state = multilevel_minimize(state, B, **args)

            dl = get_b_dl(state, kwargs.get("dense", False),
                          kwargs.get("multigraph", False),
                          kwargs.get("nested_dl", False),
                          kwargs.get("complete", False),
                          kwargs.get("nested_overlap", False),
                          kwargs.get("dl_ent", False))

            b_cache[B] = [dl, state]

            if verbose:
                print("Result for B=%d: L=%g" % (B, dl))

        min_dl = float(inf)
        best_B = None
        for Bi in b_cache.keys():
            if b_cache[Bi][0] <= min_dl:
                min_dl = b_cache[Bi][0]
                best_B = Bi
        if verbose:
            print("Best result: B=%d, L=%g" % (best_B, min_dl))

        return b_cache[best_B][1]


    def cleanup_cache(b_cache, B_min, B_max):
        best_B = None
        min_dl = numpy.inf
        for Bi in b_cache.keys():
            if b_cache[Bi][0] <= min_dl:
                min_dl = b_cache[Bi][0]
                best_B = Bi

        del_Bs = []

        for Bi in b_cache.keys():
            if (Bi < B_min or Bi > B_max) and Bi != best_B:
                del_Bs.append(Bi)

        for Bi in del_Bs:
            del b_cache[Bi]

    B_lims = (min_B, max_B)

    # Initial bracketing
    while True:
        f_max = get_state_dl(B=max_B, **kwargs)
        f_mid = get_state_dl(B=mid_B, **kwargs)
        f_min = get_state_dl(B=min_B, **kwargs)

        if verbose:
            print("Current bracket:", (min_B, mid_B, max_B), (f_min, f_mid, f_max))

        cleanup_cache(b_cache, min_B, max_B)

        if f_mid > f_min or f_mid > f_max:
            if f_min < f_max:
                max_B = mid_B
                mid_B = get_mid(min_B, mid_B, random_bisection)
            else:
                min_B = mid_B
                mid_B = get_mid(mid_B, max_B, random_bisection)
        else:
            break

        if max_B - mid_B <= 1:
            break

    # Fibonacci search
    while True:
        if max_B - mid_B > mid_B - min_B:
            x = get_mid(mid_B, max_B, random_bisection)
        else:
            x = get_mid(min_B, mid_B, random_bisection)

        f_x = get_state_dl(B=x, **kwargs)
        f_mid = get_state_dl(B=mid_B, **kwargs)

        if verbose:
            print("Current bracket:",
                  (min_B, mid_B, max_B), (get_state_dl(B=min_B, **kwargs), f_mid,
                                          get_state_dl(B=max_B, **kwargs)))
            print("Bisect at", x, "with L=%g" % f_x)

        if max_B - mid_B <= 1:
            min_dl = numpy.inf
            best_B = None
            for Bi in b_cache.keys():
                if Bi < B_lims[0] or Bi > B_lims[1]:
                    continue
                if b_cache[Bi][0] <= min_dl:
                    min_dl = b_cache[Bi][0]
                    best_B = Bi
            if verbose:
                print("Best result: B=%d, L=%g" % (best_B, min_dl))

            return b_cache[best_B][1]

        if f_x < f_mid:
            if max_B - mid_B > mid_B - min_B:
                min_B = mid_B
                mid_B = x
            else:
                max_B = mid_B
                mid_B = x
        else:
            if max_B - mid_B > mid_B - min_B:
                max_B = x
            else:
                min_B = x

        cleanup_cache(b_cache, min_B, max_B)



def collect_edge_marginals(state, p=None):
    r"""Collect the edge marginal histogram, which counts the number of times
    the endpoints of each node have been assigned to a given block pair.

    This should be called multiple times, after repeated runs of the
    :func:`mcmc_sweep` function.

    Parameters
    ----------
    state : :class:`~graph_tool.community.BlockState`
        The block state.
    p : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Edge property map with vector-type values, storing the previous block
        membership counts.  Each vector entry corresponds to ``b[i] + B *
        b[j]``, where ``b`` is the block membership and ``i = min(source(e),
        target(e))`` and ``j = max(source(e), target(e))``. If not provided, an
        empty histogram will be created.

    Returns
    -------
    p : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex property map with vector-type values, storing the accumulated
        block membership counts.


    Examples
    --------
    .. testsetup:: collect_edge_marginals

       gt.seed_rng(42)
       np.random.seed(42)

    .. doctest:: collect_edge_marginals

       >>> g = gt.collection.data["polbooks"]
       >>> state = gt.BlockState(g, B=4, deg_corr=True)
       >>> pe = None
       >>> for i in range(1000):        # remove part of the transient
       ...     ds, nmoves = gt.mcmc_sweep(state)
       >>> for i in range(1000):
       ...     ds, nmoves = gt.mcmc_sweep(state)
       ...     pe = gt.collect_edge_marginals(state, pe)
       >>> gt.bethe_entropy(state, pe)[0]
       17.6097732625099...
    """

    if p is None:
        p = state.g.new_edge_property("vector<int>")

    libcommunity.edge_marginals(state.g._Graph__graph,
                                state.bg._Graph__graph,
                                state.B,
                                _prop("v", state.g, state.b),
                                _prop("e", state.g, p))
    return p

def collect_vertex_marginals(state, p=None):
    r"""Collect the vertex marginal histogram, which counts the number of times a
    node was assigned to a given block.

    This should be called multiple times, after repeated runs of the
    :func:`mcmc_sweep` function.

    Parameters
    ----------
    state : :class:`~graph_tool.community.BlockState`
        The block state.
    p : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex property map with vector-type values, storing the previous block
        membership counts. If not provided, an empty histogram will be created.

    Returns
    -------
    p : :class:`~graph_tool.PropertyMap`
        Vertex property map with vector-type values, storing the accumulated
        block membership counts.

    Examples
    --------
    .. testsetup:: cvm

       gt.seed_rng(42)
       np.random.seed(42)

    .. doctest:: cvm

       >>> g = gt.collection.data["polbooks"]
       >>> state = gt.BlockState(g, B=4, deg_corr=True)
       >>> pv = None
       >>> for i in range(1000):        # remove part of the transient
       ...     ds, nmoves = gt.mcmc_sweep(state)
       >>> for i in range(1000):
       ...     ds, nmoves = gt.mcmc_sweep(state)
       ...     pv = gt.collect_vertex_marginals(state, pv)
       >>> gt.mf_entropy(state, pv)
       20.117550557730116
       >>> gt.graph_draw(g, pos=g.vp["pos"], vertex_shape="pie", vertex_pie_fractions=pv, output="polbooks_blocks_soft_B4.pdf")
       <...>

    .. testcleanup:: cvm

       gt.graph_draw(g, pos=g.vp["pos"], vertex_shape="pie", vertex_pie_fractions=pv, output="polbooks_blocks_soft_B4.png")

    .. figure:: polbooks_blocks_soft_B4.*
       :align: center

       "Soft" block partition of a political books network with :math:`B=4`.

    """

    if p is None:
        p = state.g.new_vertex_property("vector<int>")

    libcommunity.vertex_marginals(state.g._Graph__graph,
                                  _prop("v", state.g, state.b),
                                  _prop("v", state.g, p))
    return p

def bethe_entropy(state, p):
    r"""Compute the Bethe entropy given the edge block membership marginals.

    Parameters
    ----------
    state : :class:`~graph_tool.community.BlockState`
        The block state.
    p : :class:`~graph_tool.PropertyMap`
        Edge property map with vector-type values, storing the previous block
        membership counts.  Each vector entry corresponds to ``b[i] + B *
        b[j]``, where ``b`` is the block membership and ``i = min(source(e),
        target(e))`` and ``j = max(source(e), target(e))``.

    Returns
    -------
    H : ``float``
        The Bethe entropy value (in `nats <http://en.wikipedia.org/wiki/Nat_%28information%29>`_)
    Hmf : ``float``
        The "mean field" entropy value (in `nats <http://en.wikipedia.org/wiki/Nat_%28information%29>`_),
        as would be returned by the :func:`mf_entropy` function.
    pv : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
        Vertex property map with vector-type values, storing the accumulated
        block membership counts. These are the node marginals, as would be
        returned by the :func:`collect_vertex_marginals` function.

    Notes
    -----

    The Bethe entropy is defined as,

    .. math::

        H = -\sum_{e,(r,s)}\pi_{(r,s)}^e\ln\pi_{(r,s)}^e - \sum_{v,r}(1-k_i)\pi_r^v\ln\pi_r^v,

    where :math:`\pi_{(r,s)}^e` is the marginal probability that the endpoints
    of the edge :math:`e` belong to blocks :math:`(r,s)`, and :math:`\pi_r^v` is
    the marginal probability that vertex :math:`v` belongs to block :math:`r`,
    and :math:`k_i` is the degree of vertex :math:`v` (or total degree for
    directed graphs).

    References
    ----------
    .. [mezard-information-2009] Marc Mézard, Andrea Montanari, "Information,
       Physics, and Computation", Oxford Univ Press, 2009.
    """
    B = state.B
    H = 0
    pv =  state.g.new_vertex_property("vector<double>")

    H, sH, Hmf, sHmf  = libcommunity.bethe_entropy(state.g._Graph__graph,
                                                   state.B,
                                                   _prop("e", state.g, p),
                                                   _prop("v", state.g, pv))
    return H, Hmf, pv


def mf_entropy(state, p):
    r"""Compute the "mean field" entropy given the vertex block membership marginals.

    Parameters
    ----------
    state : :class:`~graph_tool.community.BlockState`
        The block state.
    p : :class:`~graph_tool.PropertyMap`
        Vertex property map with vector-type values, storing the accumulated block
        membership counts.

    Returns
    -------
    Hmf : ``float``
        The "mean field" entropy value (in `nats <http://en.wikipedia.org/wiki/Nat_%28information%29>`_).

    Notes
    -----

    The "mean field" entropy is defined as,

    .. math::

        H = - \sum_{v,r}\pi_r^v\ln\pi_r^v,

    where :math:`\pi_r^v` is the marginal probability that vertex :math:`v`
    belongs to block :math:`r`.

    References
    ----------
    .. [mezard-information-2009] Marc Mézard, Andrea Montanari, "Information,
       Physics, and Computation", Oxford Univ Press, 2009.

    """
    H = 0
    for v in state.g.vertices():
        N = p[v].a.sum()
        if N == 0:
            continue
        pvi = asarray(p[v].a, dtype="float") /  N
        pvi = pvi[pvi > 0]
        H -= (pvi * log(pvi)).sum()
    return H


from . overlap_blockmodel import *
from . covariate_blockmodel import *
