PGX 20.1.1
Documentation

Louvain

Category community detection

Algorithm ID pgx_builtin_c4_louvain

Time Complexity O(E * k) with E = number of edges, k <= maximum number of iterations

Space Requirement O(8 * V + E) with V = number of vertices

Javadoc


Louvain is an algorithm for community detection in large graphs which uses the graph's modularity. Initially it assigns a different community to each node of the graph. It then iterates over the nodes and evaluates for each node the modularity gain obtained by removing the node from its community and placing it in the community of one of its neigbours. The node is placed in the community for which the modularity gain is maximum. This process is repeated for all nodes until no improvement is possible, i.e until no new assignement of a node to a different community can improve the graph's modularity.

Signature

Input Argument Type Comment
G graph
weight edgeProp weights of the edges of the graph.
max_iter int maximum number of iterations that will be performed during each pass.
nbr_pass int number of passes that will be performed.
tol double maximum tolerated error value. The algorithm will stop once the graph's total modularity gain becomes smaller than this value.
Output Argument Type Comment
communityId vertexProp the community ID assigned to each node.

Code

/*
 * Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
 */

procedure louvain(graph G, edgeProp<double> weight, int max_iter, int nbr_pass, double tol;
    vertexProp<long> communityId) {

  long c = 0;
  int iter = 0;
  int pass = 0;
  long num_nodes = G.numNodes();
  map<long, double> sumIn;
  map<long, double> sumTotal;
  vertexProp<double> edgeWeight;

  // initialize communities
  for(n: G.nodes) {
    n.communityId = c;

    //sum of the weights of the edges incident to node n
    n.edgeWeight = sum(e: n.edges) { e.weight };

    //sum of the weights of the edges inside n's community
    sumIn[c] = sum(e: n.edges) (e.toNode() == n) { e.weight };

    //sum of the weights of the edges incident to nodes in n's community
    sumTotal[c] = n.edgeWeight;

    c++;
  }

  double twoM = 2 * sum(e: G.edges) { e.weight };

  double new_mod = 0;
  double cur_mod = 0;

  if (nbr_pass > 1) {
    new_mod = modularity(G, weight, edgeWeight, communityId, twoM);
  }

  bool changed;

  do {
    cur_mod = new_mod;

    //aggregate the graph: nodes of the same community constitute a super node
    map<long, nodeSet> snodes;
    map<long, nodeSet> super_nbrs;
    map<long, double> all_super_edges;
    vertexProp<long> snode;
    map<long, long> snode_community;
    map<long, double> edgeWeightSum;
    for(n: G.nodes) {
      snodes[n.communityId].add(n);
      snode_community[n.communityId] = n.communityId;
      n.snode = n.communityId;
      for (n_nbr: n.nbrs) {
        edge e = n_nbr.edge();
        long idx = (num_nodes * n.communityId) + n_nbr.communityId;
        if (!all_super_edges.hasKey(idx)) {
          super_nbrs[n.communityId].add(n_nbr);
        }
        all_super_edges[idx] += e.weight;
        edgeWeightSum[n.communityId] += e.weight;
      }
    }

    do {
      changed = false;
      for(n: snodes.keys) {
        c = snode_community[n];
        double kIn = 0;
        double gain = 0;
        nodeSet snbrs = super_nbrs[n];
        double max_gain = 0;
        double modularity_gain;
        for(o: snbrs.items) {
          long comm = snode_community[o.snode];
          for(m: snbrs.items) (snode_community[m.snode] == comm) {
            kIn += all_super_edges[(num_nodes * n) + m.snode];
          }

          modularity_gain = (sumIn[comm] + kIn) / twoM - pow((sumTotal[comm] + edgeWeightSum[n]) / twoM, 2) - (sumIn[comm] / twoM - pow(sumTotal[comm] / twoM, 2) - pow(edgeWeightSum[n] / twoM, 2));
          if (modularity_gain > max_gain) {
            max_gain = modularity_gain;
            snode_community[n] = snode_community[o.snode];
          }
        }
        if (snode_community[n] != c) {
          double kInOld = 0;
          double kInNew = 0;
          changed = true;
          for(m: snbrs.items) (snode_community[m.snode] == c) {
            kInOld += all_super_edges[(num_nodes * n) + m.snode];
          }
          sumIn[c] = sumIn[c] - kInOld;
          sumTotal[c] = sumTotal[c] - (edgeWeightSum[n] - kInOld);
          for(m: snbrs.items) (snode_community[m.snode] == snode_community[n]) {
            kInNew += all_super_edges[(num_nodes * n) + m.snode];
          }
          sumIn[snode_community[n]] = sumIn[snode_community[n]] + kInNew;
          sumTotal[snode_community[n]] = sumTotal[snode_community[n]] + (edgeWeightSum[n] - kInNew);
        }
      }
      iter++;
    } while (changed && iter < max_iter);
    foreach (n: G.nodes) {
      n.communityId = snode_community[n.snode];
    }
    pass++;
    if (nbr_pass > 1) {
      new_mod = modularity(G, weight, edgeWeight, communityId, twoM);
    }
  } while (pass < nbr_pass && (new_mod - cur_mod > tol));
}

local modularity(graph G, edgeProp<double> weight, vertexProp<double> edgeWeight, vertexProp<long> communityId,
  double twoM) : double {
  double Q = 0;
  foreach(i: G.nodes) {
    foreach(j: G.nodes) (i.communityId == j.communityId) {
      double aij = sum(e: j.edges) (e.toNode() == i) { e.weight };
      Q += aij - (i.edgeWeight * j.edgeWeight / twoM);
    }
  }
  return Q / twoM;
}