PGX 20.1.1
Documentation

WTF (Whom To Follow) Algorithm

Category link predition

Algorithm ID pgx_builtin_l1_whom_to_follow

Time Complexity O(E * (p + s)) with E = number of edges, p <= maximum number of iterations for the Pagerank step, s <= maximum number of iterations for the SALSA step

Space Requirement O(5 * V) with V = number of vertices

Javadoc


The Whom To Follow algorithm is composed by two main stages: the first one is meant to get the relevant vertices (users) for a given source vertex (particular user), which in this implementation is done with personalized Pagerank for the given source vertex. While the second stage analizes the relationships between the relevant vertices previously found through the edges linking them with their neighbors. This second stage relies on SALSA algorithm and it asigns a ranking score to all the hubs and authority vertices, so the recommendations can come from this assigned values. Whom To Follow takes the concept of authority and hub vertices, and adapts it to users in social networks. The hub vertices become similar users with respect to the given source vertex (also an user), and the authority vertices are translated into users that might be on the interest of the source vertex, i.e. users to follow.

Signature

Input Argument Type Comment
G graph
source node the chosen vertex from the graph for personalization of the recommendations.
top_k int the maximum number of recommendations that will be returned. This number should be smaller than the size of the circle of trust.
circle_of_trust int the maximum size of the circle of trust.
max_iter int maximum number of iterations that will be performed for the Pagerank stage.
tol double maximum tolerated error value for the Pagerank stage. The stage will stop once the sum of the error values of all vertices becomes smaller than this value.
damping_factor double damping factor for the Pagerank stage.
salsa_max_iter int maximum number of iterations that will be performed for the SALSA stage.
salsa_tol double maximum tolerated error value for the SALSA stage. The stage will stop once the sum of the error values of all vertices becomes smaller than this value.
Output Argument Type Comment
hubs nodeSeq vertex sequence holding the top rated hub vertices (similar users) for the recommendations.
auths nodeSeq vertex sequence holding the top rated authority vertices (users to follow) for the recommendations.

Code

/*
 * Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
 */
package oracle.pgx.algorithms;

import oracle.pgx.algorithm.PgxGraph;
import oracle.pgx.algorithm.PgxMap;
import oracle.pgx.algorithm.PgxVertex;
import oracle.pgx.algorithm.Scalar;
import oracle.pgx.algorithm.VertexProperty;
import oracle.pgx.algorithm.VertexSequence;
import oracle.pgx.algorithm.VertexSet;
import oracle.pgx.algorithm.annotations.GraphAlgorithm;
import oracle.pgx.algorithm.annotations.Out;

import static java.lang.Math.abs;

@GraphAlgorithm
public class Wtf {
  public void whomToFollow(PgxGraph g, PgxVertex source, int rawTopK, int circleOfTrust, int maxIter, double tol,
      double dampingFactor, int salsaMaxIter, double salsaTol, @Out VertexSequence hubs, @Out VertexSequence auths) {
    // adjusting sizes of circle of trust and topK values if needed
    long circle = circleOfTrust > g.getNumVertices() ? g.getNumVertices() - 1 : circleOfTrust;
    long topK = rawTopK > circle ? circle : rawTopK;

    // computing personalized pagerank for getting relevant vertices
    VertexProperty<Double> rank = VertexProperty.create();
    personalizedPagerank(g, source, tol, dampingFactor, maxIter, rank);

    // creating circle of trust with top ranked vertices
    VertexSet circleSet = VertexSet.create();
    VertexSequence copyCircleSet = VertexSequence.create();
    VertexProperty<Integer> isLeft = VertexProperty.create(0);
    Scalar<Integer> k = Scalar.create(0);

    PgxMap<PgxVertex, Double> heap = PgxMap.create();
    g.getVertices().filter(n -> k.get() < circle && n != source).forSequential(n -> {
      isLeft.set(n, 1);
      circleSet.add(n);
      heap.set(n, rank.get(n));
      k.increment();
    });
    while (heap.size() > 0) {
      PgxVertex n = heap.getKeyForMaxValue();
      heap.remove(n);
      copyCircleSet.pushFront(n);
    }

    // creating bipartite graph with vertices from the circle of trust and their neighbors
    copyCircleSet.forSequential(n -> {
      long localAuth = n.getNeighbors().filter(nbr -> isLeft.get(nbr) != 1 && nbr != source).size();

      if (localAuth == 0) {
        circleSet.remove(n);
        isLeft.set(n, 0);
      }
    });

    VertexProperty<Double> deg = VertexProperty.create();
    Scalar<Integer> numHubs = Scalar.create(0);
    Scalar<Integer> numAuths = Scalar.create(0);

    circleSet.forEach(n -> {
      Scalar<Double> localAuth = Scalar.create(0d);
      n.getNeighbors().filter(nbr -> isLeft.get(nbr) != 1 && nbr != source).forEach(nbr -> {
        if (isLeft.get(nbr) == 0) {
          numAuths.increment();
          isLeft.set(nbr, 2);
          deg.set(nbr, (double) nbr.getInNeighbors().filter(nbr2 -> isLeft.get(nbr2) == 1).size());
        }
        localAuth.increment();
      });
      deg.set(n, localAuth.get());
      numHubs.increment();
    });

    // salsa step for recommendations
    salsa(g, salsaTol, salsaMaxIter, numHubs.get(), numAuths.get(), isLeft, deg, rank);

    // getting topK hubs and auths
    getTopNodes(g, topK, numHubs.get(), numAuths.get(), rank, isLeft, hubs, auths);
  }

  void personalizedPagerank(PgxGraph g, PgxVertex v, double tol, double dampingFactor, int maxIter,
      @Out VertexProperty<Double> rank) {
    double numVertices = g.getNumVertices();
    Scalar<Double> diff = Scalar.create(0.0);
    int cnt = 0;
    rank.setAll(0d);
    rank.set(v, 1.0);

    do {
      diff.set(0.0);
      double danglingFactor =
          dampingFactor / numVertices * g.getVertices().filter(n -> n.getOutDegree() == 0).sum(rank);

      g.getVertices().forEach(t -> {
        double val1 = (t == v) ? (1 - dampingFactor) : 0;
        double val2 = dampingFactor * t.getInNeighbors().sum(w -> rank.get(w) / w.getOutDegree());
        double val = val1 + val2 + danglingFactor;
        diff.reduceAdd(abs(val - rank.get(t)));
        rank.setDeferred(t, val);
      });
      cnt++;
    } while (diff.get() > tol && cnt < maxIter);
  }

  void salsa(PgxGraph g, double salsaTol, int salsaMaxIter, int numHubs, int numAuths, VertexProperty<Integer> isLeft,
      VertexProperty<Double> deg, @Out VertexProperty<Double> rank) {
    Scalar<Double> diff = Scalar.create(0.0);
    VertexProperty<Double> filteredWeightedNeighborRanks = VertexProperty.create();
    int cnt = 0;

    g.getVertices().filter(n -> isLeft.get(n) != 0).forEach(n -> {
      if (isLeft.get(n) == 1) {
        rank.set(n, 1.0 / numHubs);
      } else {
        rank.set(n, 1.0 / numAuths);
      }
    });

    do {
      diff.set(0.0);
      g.getVertices().forEach(u -> {
        Scalar<Double> val = Scalar.create(0.0);
        if (isLeft.get(u) == 2) {
          val.reduceAdd(u.getInNeighbors().filter(w -> isLeft.get(w) == 1).sum(w -> rank.get(w) / deg.get(w)));
          val.set(val.get() / deg.get(u));
        } else if (isLeft.get(u) == 1) {
          val.reduceAdd(u.getOutNeighbors().filter(w -> isLeft.get(w) == 2).sum(w -> rank.get(w) / deg.get(w)));
          val.set(val.get() / deg.get(u));
        }
        filteredWeightedNeighborRanks.setDeferred(u, val.get());
      });

      g.getVertices().filter(n -> isLeft.get(n) != 0).forEach(n -> {
        Scalar<Double> val = Scalar.create(0.0);
        if (isLeft.get(n) == 1) {
          n.getOutNeighbors().filter(u -> isLeft.get(u) == 2).forEach(u ->
              val.reduceAdd(filteredWeightedNeighborRanks.get(u)));
        } else {
          n.getInNeighbors().filter(u -> isLeft.get(u) == 1).forEach(u ->
              val.reduceAdd(filteredWeightedNeighborRanks.get(u)));
        }

        diff.reduceAdd(abs(val.get() - rank.get(n)));
        rank.setDeferred(n, val.get());
      });
      cnt++;
    } while (diff.get() > salsaTol && cnt < salsaMaxIter);
  }

  void getTopNodes(PgxGraph g, long topK, int numHubs, int numAuths, VertexProperty<Double> rank,
      VertexProperty<Integer> isLeft, @Out VertexSequence hubs, @Out VertexSequence auths) {

    PgxMap<PgxVertex, Double> hubsHeap = PgxMap.create();
    PgxMap<PgxVertex, Double> authsHeap = PgxMap.create();
    g.getVertices().filter(n -> isLeft.get(n) == 1 || isLeft.get(n) == 2).forSequential(n -> {
      if (isLeft.get(n) == 1) {
        hubsHeap.set(n, rank.get(n));
      } else {
        authsHeap.set(n, rank.get(n));
      }
    });

    fillOutputFromHeap(g, topK, numHubs, hubsHeap, hubs);
    fillOutputFromHeap(g, topK, numAuths, authsHeap, auths);
  }

  void fillOutputFromHeap(PgxGraph g, long topK, int maxCount, PgxMap<PgxVertex, Double> heap,
      @Out VertexSequence list) {

    Scalar<Long> k = Scalar.create(0L);
    while (k.get() < maxCount && k.get() < topK) {
      PgxVertex n = heap.getKeyForMaxValue();
      heap.remove(n);
      list.pushBack(n);
      k.increment();
      if (k.get() == maxCount) {
        return;
      }
    }
  }
}
/*
 * Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
 */

procedure whom_to_follow(graph G, node source, int top_k, int circle_of_trust, int max_iter, double tol,
    double damping_factor, int salsa_max_iter, double salsa_tol; nodeSeq hubs, nodeSeq auths) {

  // adjusting sizes of circle of trust and top_k values if needed
  int circle = circle_of_trust > G.numNodes() ? (int) (G.numNodes() - 1L) : circle_of_trust;
  int topK = top_k > circle ? circle : top_k;

  // computing personalized pagerank for getting relevant vertices
  nodeProp<double> rank;
  personalized_pagerank(G, source, tol, damping_factor, max_iter, rank);

  // creating circle of trust with top ranked vertices
  nodeSet circle_set;
  nodeSeq copy_circle_set;
  nodeProp<int> is_left;
  G.is_left = 0;
  int k = 0;

  map<node, double> heap;
  for (n: G.nodes) (k < circle && n != source) {
    n.is_left = 1;
    circle_set.add(n);
    heap[n] = n.rank;
    k++;
  }
  while (heap.size() > 0) {
    node n = heap.getMaxKey();
    heap.remove(n);
    copy_circle_set.pushFront(n);
  }

  // creating bipartite graph with vertices from the circle of trust and their neighbors
  for (n: copy_circle_set.items) {
    int local_auth = count (nbr: n.nbrs) (nbr.is_left != 1 && nbr != source);
    if (local_auth == 0) {
      circle_set.remove(n);
      n.is_left = 0;
    }
  }

  nodeProp<double> deg;
  int num_hubs = 0;
  int num_auths = 0;
  foreach (n: circle_set.items) {
    int local_auth = 0;
    foreach (nbr: n.nbrs) (nbr.is_left != 1 && nbr != source) {
      if (nbr.is_left == 0) {
        num_auths++;
        nbr.is_left = 2;
        nbr.deg = count (nbr2: nbr.inNbrs) (nbr2.is_left == 1);
      }
      local_auth++;
    }
    n.deg = local_auth;
    num_hubs++;
  }

  // salsa step for recommendations
  salsa(G, salsa_tol, salsa_max_iter, num_hubs, num_auths, is_left, deg, rank);

  // getting topK hubs and auths
  get_top_nodes(G, topK, num_hubs, num_auths, rank, is_left, hubs, auths);
}

local personalized_pagerank(graph G, node v, double tol, double damping_factor, int max_iter;
    nodeProp<double> rank) {

  double N = G.numNodes();
  double diff = 0.0;
  int cnt = 0;
  G.rank = 0;
  v.rank = 1.0;

  do {
    diff = 0.0;
    double dangling_factor = damping_factor / N * sum (n: G.nodes) (n.outDegree() == 0) {n.rank};

    foreach (t: G.nodes) {
      double val1 = (t == v) ? (1 - damping_factor) : 0;
      double val2 = damping_factor * sum (w: t.inNbrs) {w.rank / w.outDegree()} ;
      double val = val1 + val2 + dangling_factor;
      diff += |val - t.rank|;
      t.rank <= val;
    }
    cnt++;
  } while (diff > tol && cnt < max_iter);
}

local salsa(graph G, double salsa_tol, int salsa_max_iter, int num_hubs, int num_auths,
    nodeProp<int> is_left, nodeProp<double> deg; nodeProp<double> rank) {

  nodeProp<double> filteredWeightedNeighborRanks;
  double diff = 0.0;
  int cnt = 0;

  foreach (n: G.nodes) (n.is_left != 0) {
    if (n.is_left == 1) {
      n.rank = 1.0 / num_hubs;
    } else {
      n.rank = 1.0 / num_auths;
    }
  }

  do {
    diff = 0.0;
    foreach (u: G.nodes) {
      double val = 0.0;
      if (u.is_left == 2) {
          val = sum (w: u.inNbrs) (w.is_left == 1) {w.rank / w.deg} / u.deg;
      }
      if (u.is_left == 1) {
          val = sum (w: u.outNbrs) (w.is_left == 2) {w.rank / w.deg} / u.deg;
      }
      u.filteredWeightedNeighborRanks = val;
    }
    foreach (n: G.nodes) (n.is_left != 0) {
      double val = 0.0;
      if (n.is_left == 1) {
        foreach (u: n.outNbrs) (u.is_left == 2) {
          val += u.filteredWeightedNeighborRanks;
        }
      } else {
        foreach (u: n.inNbrs) (u.is_left == 1) {
          val += u.filteredWeightedNeighborRanks;
        }
      }
      diff += |val - n.rank|;
      n.rank <= val;
    }
    cnt++;
  } while (diff > salsa_tol && cnt < salsa_max_iter);
}

local get_top_nodes(graph G, int topK, int num_hubs, int num_auths, nodeProp<double> rank,
    nodeProp<int> is_left; nodeSeq hubs, nodeSeq auths) {

  map<node, double> hubs_heap;
  map<node, double> auths_heap;
  for (n: G.nodes) (n.is_left == 1 || n.is_left == 2) {
    if (n.is_left == 1) {
      hubs_heap[n] = n.rank;
    } else {
      auths_heap[n] = n.rank;
    }
  }
  fill_output_from_heap(G, topK, num_hubs, hubs_heap, hubs);
  fill_output_from_heap(G, topK, num_auths, auths_heap, auths);
}

local fill_output_from_heap(graph G, int topK, int max_count, map<node, double> heap; nodeSeq list) {
  int k = 0;
  while (k < max_count && k < topK) {
    node n = heap.getMaxKey();
    heap.remove(n);
    list.push(n);
    k++;
    if (k == max_count) {
      return;
    }
  }
}