PGX 20.1.1
Documentation

Stochastic Approach for Link-Structure Analysis (SALSA) Algorithms

Assigns a numerical weight to each vertex based on the edges connecting them.


Classic SALSA

Category ranking and walking

Algorithm ID pgx_builtin_r1b_salsa

Time Complexity O(E * k) with E = number of edges, k <= maximum number of iterations

Space Requirement O(2 * V) with V = number of vertices

Javadoc


The idea of hubs and authorites comes from the web pages: a hub is regarded as a page that is not authoritative in a specific matter, but it has instead links to authority pages, which are regarded as meaningful sources for a particular topic by many hubs. Thus a good hub will point to many authorities, while a good authority will be pointed by many hubs. SALSA is an algorithm that computes authorities and hubs ranking scores for the vertices using the network created by the edges of the bipartite graph and assigning weights to the contributions of their 2nd-degree neighbors. This way of computing the scores creates the independence between the authority and hub scores, which are assigned to the vertices depending on the side of the graph they belong (left:hub / right:aut).

Signature

Input Argument Type Comment
G graph
is_left nodeProp boolean vertex property stating the side of the vertices in the bipartite graph (left for hubs, right for auths).
tol double maximum tolerated error value. The algorithm will stop once the sum of the error values of all vertices becomes smaller than this value.
max_iter int maximum number of iterations that will be performed.
Output Argument Type Comment
rank nodeProp vertex property holding the normalized authority/hub ranking score for each vertex.

Code

/*
 * Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
 */
package oracle.pgx.algorithms;

import oracle.pgx.algorithm.annotations.GraphAlgorithm;
import oracle.pgx.algorithm.PgxGraph;
import oracle.pgx.algorithm.Scalar;
import oracle.pgx.algorithm.VertexProperty;
import oracle.pgx.algorithm.annotations.Out;

import static java.lang.Math.abs;

@GraphAlgorithm
public class Salsa {
  public void salsa(PgxGraph g, VertexProperty<Boolean> isLeft, double tol, int maxIter,
      @Out VertexProperty<Double> rank) {
    long numVertices = g.getNumVertices();

    if (numVertices == 0) {
      return;
    }

    VertexProperty<Double> deg = VertexProperty.create();
    Scalar<Double> diff = Scalar.create();
    int cnt = 0;

    deg.setAll(v -> (double) (isLeft.get(v) ? v.getOutDegree() : v.getInDegree()));
    long numHubs = g.getVertices().filter(isLeft).size();
    long numAuths = numVertices - numHubs;

    rank.setAll(v -> 1.0 / (isLeft.get(v) ? numHubs : numAuths));

    do {
      diff.set(0.0);
      g.getVertices().filter(n -> n.getOutDegree() + n.getInDegree() > 0).forEach(n -> {
        Scalar<Double> val = Scalar.create(0.0);
        if (isLeft.get(n)) {
          n.getOutNeighbors()
              .forEach(v -> val.reduceAdd(v.getInNeighbors().sum(w -> rank.get(w) / (deg.get(v) * deg.get(w)))));
        } else {
          n.getInNeighbors()
              .forEach(v -> val.reduceAdd(v.getOutNeighbors().sum(w -> rank.get(w) / (deg.get(v) * deg.get(w)))));
        }
        diff.reduceAdd(abs(val.get() - rank.get(n)));
        rank.setDeferred(n, val.get());
      });
      cnt++;
    } while (diff.get() > tol && cnt < maxIter);
  }
}
/*
 * Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
 */

procedure salsa(graph G, nodeProp<bool> is_left, double tol, int max_iter; nodeProp<double> rank) {

  long N = G.numNodes();

  if (N == 0) {
    return;
  }

  nodeProp<double> deg;
  double diff;
  int cnt = 0;
  int num_hubs = 0;

  G.deg = _.is_left ? _.outDegree() : _.inDegree();
  num_hubs = count (n: G.nodes) (n.is_left);
  int num_auths = (int) N - num_hubs;

  G.rank = 1.0 / (_.is_left ? num_hubs : num_auths);

  do {

    diff = 0.0;
    foreach (n: G.nodes) (n.numNbrs() + n.numInNbrs() > 0) {
      double val = 0.0;
      if (n.is_left) {
        foreach (v: n.outNbrs) {
          val += sum (w: v.inNbrs) {w.rank / (v.deg * w.deg)};
        }
      } else {
        foreach (v: n.inNbrs) {
          val += sum (w: v.outNbrs) {w.rank / (v.deg * w.deg)};
        }
      }
      diff += |val - n.rank|;
      n.rank <= val;
    }
    cnt++;
  } while (diff > tol && cnt < max_iter);
}

Personalized SALSA

Category ranking and walking

Algorithm ID pgx_builtin_r2_personalized_salsa

Time Complexity O(E * k) with E = number of edges, k <= maximum number of iterations

Space Requirement O(2 * V) with V = number of vertices

Javadoc


This Personalized version of SALSA allows to select a particular vertex or set of vertices from the given graph in order to give them a greater importance when computing the ranking scores, which will have as result a personalized SALSA score and show relevant (or similar) vertices to the ones chosen for the personalization.

Signature

Input Argument Type Comment
G graph
h node the chosen vertex from the graph for personalization.
is_left nodeProp boolean vertex property stating the side of the vertices in the bipartite graph (left for hubs, right for auths).
damp double damping factor to modulate the degree of personalization of the scores by the algorithm.
tol double maximum tolerated error value. The algorithm will stop once the sum of the error values of all vertices becomes smaller than this value.
max_iter int maximum number of iterations that will be performed.
Output Argument Type Comment
rank nodeProp vertex property holding the normalized authority/hub ranking score for each vertex.

Code

/*
 * Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
 */
package oracle.pgx.algorithms;

import oracle.pgx.algorithm.annotations.GraphAlgorithm;
import oracle.pgx.algorithm.PgxGraph;
import oracle.pgx.algorithm.PgxVertex;
import oracle.pgx.algorithm.Scalar;
import oracle.pgx.algorithm.VertexProperty;
import oracle.pgx.algorithm.annotations.Out;

import static java.lang.Math.abs;

@GraphAlgorithm
public class SalsaPersonalized {
  public void personalizedSalsa(PgxGraph g, PgxVertex v, VertexProperty<Boolean> isLeft, double damp, double tol,
      int maxIter, @Out VertexProperty<Double> rank) {
    long numVertices = g.getNumVertices();

    if (numVertices == 0) {
      return;
    }

    VertexProperty<Double> deg = VertexProperty.create();
    Scalar<Double> diff = Scalar.create();
    int cnt = 0;

    deg.setAll(w -> (double) (isLeft.get(w) ? w.getOutDegree() : w.getInDegree()));
    long numHubs = g.getVertices().filter(isLeft).size();
    long numAuths = numVertices - numHubs;

    if (isLeft.get(v)) {
      rank.setAll(w -> isLeft.get(w) ? 0.0 : 1.0 / numAuths);
    } else {
      rank.setAll(w -> isLeft.get(w) ? 1.0 / numHubs : 0.0);
    }

    rank.set(v, 1.0);

    do {
      diff.set(0.0);
      g.getVertices().filter(n -> n.getOutDegree() + n.getInDegree() > 0).forEach(n -> {
        Scalar<Double> val = Scalar.create(0.0);
        if (isLeft.get(n)) {
          n.getOutNeighbors()
              .forEach(x -> val.reduceAdd(x.getInNeighbors().sum(w -> rank.get(w) / (deg.get(x) * deg.get(w)))));
          if (isLeft.get(v)) {
            val.set((n == v ? damp : 0.0) + (1.0 - damp) * val.get());
          }
        } else {
          n.getInNeighbors()
              .forEach(x -> val.reduceAdd(x.getOutNeighbors().sum(w -> rank.get(w) / (deg.get(x) * deg.get(w)))));
          if (!isLeft.get(v)) {
            val.set((n == v ? damp : 0.0) + (1.0 - damp) * val.get());
          }
        }
        diff.reduceAdd(abs(val.get() - rank.get(n)));
        rank.setDeferred(n, val.get());
      });
      cnt++;
    } while (diff.get() > tol && cnt < maxIter);
  }
}
/*
 * Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
 */

procedure personalized_salsa (graph G,node v, nodeProp<bool> is_left, double damp,
    double tol, int max_iter; nodeProp<double> rank) {

  long N = G.numNodes();

  if (N == 0) {
    return;
  }

  nodeProp<double> deg;
  double diff;
  int cnt = 0;
  int num_hubs = 0;

  G.deg = _.is_left ? _.outDegree() : _.inDegree();
  num_hubs = count (n: G.nodes) (n.is_left);
  int num_auths = (int) (N - num_hubs);

  if (v.is_left) {
    G.rank = _.is_left ? 0.0 : 1.0 / num_auths;
  } else {
    G.rank = _.is_left ? 1.0 / num_hubs : 0.0;
  }
  v.rank = 1.0;

  do {

    diff = 0.0;
    foreach (n: G.nodes) (n.numNbrs() + n.numInNbrs() > 0) {
      double val = 0.0;
      if (n.is_left) {
        foreach (x: n.outNbrs) {
          val += sum (w: x.inNbrs) {w.rank / (x.deg * w.deg)};
        }
        if (v.is_left) {
          val = (n == v ? damp : 0.0) + (1.0 - damp) * val;
        }
      } else {
        foreach (x: n.inNbrs) {
          val += sum (w: x.outNbrs) {w.rank / (x.deg * w.deg)};
        }
        if (!v.is_left) {
          val = (n == v ? damp : 0.0) + (1.0 - damp) * val;
        }
      }
      diff += |val - n.rank|;
      n.rank <= val;
    }
    cnt++;
  } while (diff > tol && cnt < max_iter);
}

Personalized SALSA (for a set of vertices)

Category ranking and walking

Algorithm ID pgx_builtin_r3_personalized_salsa_from_set

Time Complexity O(E * k) with E = number of edges, k <= maximum number of iterations

Space Requirement O(3 * V) with V = number of vertices

Javadoc


This Personalized version of SALSA allows to select a particular vertex or set of vertices from the given graph in order to give them a greater importance when computing the ranking scores, which will have as result a personalized SALSA score and show relevant (or similar) vertices to the ones chosen for the personalization.

Signature

Input Argument Type Comment
G graph
source nodeSet the set of chosen vertices from the graph for personalization.
is_left nodeProp boolean vertex property stating the side of the vertices in the bipartite graph (left for hubs, right for auths).
damp double damping factor to modulate the degree of personalization of the scores by the algorithm.
tol double maximum tolerated error value. The algorithm will stop once the sum of the error values of all vertices becomes smaller than this value.
max_iter int maximum number of iterations that will be performed.
Output Argument Type Comment
rank nodeProp vertex property holding the normalized authority/hub ranking score for each vertex.

Code

/*
 * Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
 */
package oracle.pgx.algorithms;

import oracle.pgx.algorithm.annotations.GraphAlgorithm;
import oracle.pgx.algorithm.PgxGraph;
import oracle.pgx.algorithm.Scalar;
import oracle.pgx.algorithm.VertexProperty;
import oracle.pgx.algorithm.VertexSet;
import oracle.pgx.algorithm.annotations.Out;

import static java.lang.Math.abs;

@GraphAlgorithm
public class SalsaPersonalizedFromSet {
  public void personalizedSalsaFromSet(PgxGraph g, VertexSet source, VertexProperty<Boolean> isLeft, double damp,
      double tol, int maxIter, @Out VertexProperty<Double> rank) {
    long numVertices = g.getNumVertices();
    long m = source.size();

    if (m == 0 || numVertices == 0) {
      return;
    }

    VertexProperty<Double> deg = VertexProperty.create();
    VertexProperty<Boolean> isStart = VertexProperty.create();

    Scalar<Double> diff = Scalar.create();
    int cnt = 0;

    deg.setAll(w -> (double) (isLeft.get(w) ? w.getOutDegree() : w.getInDegree()));
    long numHubs = g.getVertices().filter(isLeft).size();
    long numAuths = numVertices - numHubs;

    long sHub = source.filter(isLeft).size();
    long sAut = source.filter(n -> !isLeft.get(n)).size();

    if (sHub > 0 && sAut > 0) { // mixed personalization
      rank.setAll(0.0);
    } else if (sHub > 0 && sAut == 0) { // personalization for hubs
      rank.setAll(v -> isLeft.get(v) ? 0.0 : 1.0 / numAuths);
    } else if (sHub == 0 && sAut > 0) { // personalization for auths
      rank.setAll(v -> isLeft.get(v) ? 1.0 / numHubs : 0.0);
    }

    isStart.setAll(false);

    source.forEach(n -> {
      if (isLeft.get(n) && sHub > 0) {
        rank.set(n, 1.0 / sHub);
      } else if (!isLeft.get(n) && sAut > 0) {
        rank.set(n, 1.0 / sAut);
      }
      isStart.set(n, true);
    });

    do {
      diff.set(0.0);
      g.getVertices().filter(n -> n.getOutDegree() + n.getInDegree() > 0).forEach(n -> {
        Scalar<Double> val = Scalar.create(0.0);
        if (isLeft.get(n)) {
          n.getOutNeighbors()
              .forEach(v -> val.reduceAdd(v.getInNeighbors().sum(w -> rank.get(w) / (deg.get(v) * deg.get(w)))));
          if (sHub > 0) {
            val.set((isStart.get(n) ? damp : 0) + (1.0 - damp) * val.get());
          }
        } else {
          n.getInNeighbors()
              .forEach(v -> val.reduceAdd(v.getOutNeighbors().sum(w -> rank.get(w) / (deg.get(v) * deg.get(w)))));
          if (sAut > 0) {
            val.set((isStart.get(n) ? damp : 0) + (1.0 - damp) * val.get());
          }
        }
        diff.reduceAdd(abs(val.get() - rank.get(n)));
        rank.setDeferred(n, val.get());
      });
      cnt++;
    } while (diff.get() > tol && cnt < maxIter);

    g.getVertices().forEach(n -> {
      if (isLeft.get(n) && sHub > 0) {
        rank.set(n, rank.get(n) / sHub);
      } else if (!isLeft.get(n) && sAut > 0) {
        rank.set(n, rank.get(n) / sAut);
      }
    });
  }
}
/*
 * Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
 */

procedure personalized_salsa_from_set(graph G, nodeSet source, nodeProp<bool> is_left,
    double damp, double tol, int max_iter; nodeProp<double> rank) {

  long N = G.numNodes();
  if (source.size() == 0 || N == 0) {
    return;
  }

  nodeProp<double> deg;
  nodeProp<bool> is_start;

  double diff;
  int cnt = 0;
  int num_hubs = 0;

  G.deg = _.is_left ? _.outDegree() : _.inDegree();
  num_hubs = count(n:G.nodes)(n.is_left);
  int num_auths = (int) N - num_hubs;

  int s_hub = count(n: source.items) (n.is_left);
  int s_aut = count(n: source.items) (!n.is_left);

  if (s_hub > 0 && s_aut > 0) { //mixed personalization
    G.rank = 0.0;
  } else if (s_hub > 0 && s_aut == 0) { //personalization for hubs
    G.rank = _.is_left ? 0.0 : 1.0/num_auths;
  } else if (s_hub == 0 && s_aut > 0) { //personalization for auths
    G.rank = _.is_left ? 1.0/num_hubs : 0.0;
  }

  G.is_start = false;

  foreach (n: source.items) {
    if (n.is_left && s_hub > 0) {
      n.rank = 1.0/s_hub;
    }
    else if (!n.is_left && s_aut > 0) {
      n.rank = 1.0/s_aut;
    }
    n.is_start = true;
  }

  do {

    diff = 0.0;
    foreach (n: G.nodes) (n.numNbrs()+n.numInNbrs() > 0) {
      double val = 0.0;
      if (n.is_left) {
        foreach (v: n.outNbrs) {
          val += sum (w: v.inNbrs) {w.rank / (v.deg * w.deg)};
        }
        if (s_hub > 0) {
          val = (n.is_start ? damp : 0) + (1.0 - damp) * val;
        }
      } else {
        foreach (v: n.inNbrs){
          val += sum (w: v.outNbrs) {w.rank / (v.deg * w.deg)};
        }
        if (s_aut > 0) {
          val = (n.is_start ? damp : 0) + (1.0 - damp) * val;
        }
      }
      diff += |val - n.rank|;
      n.rank <= val;
    }
    cnt++;
  } while (diff > tol && cnt < max_iter);

  foreach (n: G.nodes) {
    if (n.is_left && s_hub > 0) {
      n.rank = n.rank / s_hub;
    } else if (!n.is_left && s_aut > 0) {
      n.rank = n.rank / s_aut;
    }
  }
}