Stochastic Approach for Link-Structure Analysis (SALSA) Algorithms
Assigns a numerical weight to each vertex based on the edges connecting them.
Classic SALSA
The idea of hubs and authorites comes from the web pages: a hub is regarded as a page that is not authoritative in a specific matter, but it has instead links to authority pages, which are regarded as meaningful sources for a particular topic by many hubs. Thus a good hub will point to many authorities, while a good authority will be pointed by many hubs. SALSA is an algorithm that computes authorities and hubs ranking scores for the vertices using the network created by the edges of the bipartite graph and assigning weights to the contributions of their 2nd-degree neighbors. This way of computing the scores creates the independence between the authority and hub scores, which are assigned to the vertices depending on the side of the graph they belong (left:hub / right:aut).
Signature
Input Argument |
Type |
Comment |
G |
graph |
|
is_left |
vertexProp |
boolean vertex property stating the side of the vertices in the bipartite graph (left for hubs, right for auths). |
tol |
double |
maximum tolerated error value. The algorithm will stop once the sum of the error values of all vertices becomes smaller than this value. |
max_iter |
int |
maximum number of iterations that will be performed. |
Output Argument |
Type |
Comment |
rank |
vertexProp |
vertex property holding the normalized authority/hub ranking score for each vertex. |
Code
/*
* Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
*/
package oracle.pgx.algorithms;
import oracle.pgx.algorithm.annotations.GraphAlgorithm;
import oracle.pgx.algorithm.PgxGraph;
import oracle.pgx.algorithm.Scalar;
import oracle.pgx.algorithm.VertexProperty;
import oracle.pgx.algorithm.annotations.Out;
import static java.lang.Math.abs;
@GraphAlgorithm
public class Salsa {
public void salsa(PgxGraph g, VertexProperty<Boolean> isLeft, double tol, int maxIter,
@Out VertexProperty<Double> rank) {
long numVertices = g.getNumVertices();
if (numVertices == 0) {
return;
}
VertexProperty<Double> deg = VertexProperty.create();
Scalar<Double> diff = Scalar.create();
int cnt = 0;
deg.setAll(v -> (double) (isLeft.get(v) ? v.getOutDegree() : v.getInDegree()));
long numHubs = g.getVertices().filter(isLeft).size();
long numAuths = numVertices - numHubs;
rank.setAll(v -> 1.0 / (isLeft.get(v) ? numHubs : numAuths));
do {
diff.set(0.0);
g.getVertices().filter(n -> n.getOutDegree() + n.getInDegree() > 0).forEach(n -> {
Scalar<Double> val = Scalar.create(0.0);
if (isLeft.get(n)) {
n.getOutNeighbors()
.forEach(v -> val.reduceAdd(v.getInNeighbors().sum(w -> rank.get(w) / (deg.get(v) * deg.get(w)))));
} else {
n.getInNeighbors()
.forEach(v -> val.reduceAdd(v.getOutNeighbors().sum(w -> rank.get(w) / (deg.get(v) * deg.get(w)))));
}
diff.reduceAdd(abs(val.get() - rank.get(n)));
rank.setDeferred(n, val.get());
});
cnt++;
} while (diff.get() > tol && cnt < maxIter);
}
}
/*
* Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
*/
procedure salsa(graph G, vertexProp<bool> is_left, double tol, int max_iter; vertexProp<double> rank) {
long N = G.numNodes();
if (N == 0) {
return;
}
vertexProp<double> deg;
double diff;
int cnt = 0;
int num_hubs = 0;
G.deg = _.is_left ? _.outDegree() : _.inDegree();
num_hubs = count (n: G.nodes) (n.is_left);
int num_auths = (int) N - num_hubs;
G.rank = 1.0 / (_.is_left ? num_hubs : num_auths);
do {
diff = 0.0;
foreach (n: G.nodes) (n.numNbrs() + n.numInNbrs() > 0) {
double val = 0.0;
if (n.is_left) {
foreach (v: n.outNbrs) {
val += sum (w: v.inNbrs) {w.rank / (v.deg * w.deg)};
}
} else {
foreach (v: n.inNbrs) {
val += sum (w: v.outNbrs) {w.rank / (v.deg * w.deg)};
}
}
diff += |val - n.rank|;
n.rank <= val;
}
cnt++;
} while (diff > tol && cnt < max_iter);
}
Personalized SALSA
This Personalized version of SALSA allows to select a particular vertex or set of vertices from the given graph in order to give them a greater importance when computing the ranking scores, which will have as result a personalized SALSA score and show relevant (or similar) vertices to the ones chosen for the personalization.
Signature
Input Argument |
Type |
Comment |
G |
graph |
|
h |
node |
the chosen vertex from the graph for personalization. |
is_left |
vertexProp |
boolean vertex property stating the side of the vertices in the bipartite graph (left for hubs, right for auths). |
damp |
double |
damping factor to modulate the degree of personalization of the scores by the algorithm. |
tol |
double |
maximum tolerated error value. The algorithm will stop once the sum of the error values of all vertices becomes smaller than this value. |
max_iter |
int |
maximum number of iterations that will be performed. |
Output Argument |
Type |
Comment |
rank |
vertexProp |
vertex property holding the normalized authority/hub ranking score for each vertex. |
Code
/*
* Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
*/
package oracle.pgx.algorithms;
import oracle.pgx.algorithm.annotations.GraphAlgorithm;
import oracle.pgx.algorithm.PgxGraph;
import oracle.pgx.algorithm.PgxVertex;
import oracle.pgx.algorithm.Scalar;
import oracle.pgx.algorithm.VertexProperty;
import oracle.pgx.algorithm.annotations.Out;
import static java.lang.Math.abs;
@GraphAlgorithm
public class SalsaPersonalized {
public void personalizedSalsa(PgxGraph g, PgxVertex v, VertexProperty<Boolean> isLeft, double damp, double tol,
int maxIter, @Out VertexProperty<Double> rank) {
long numVertices = g.getNumVertices();
if (numVertices == 0) {
return;
}
VertexProperty<Double> deg = VertexProperty.create();
Scalar<Double> diff = Scalar.create();
int cnt = 0;
deg.setAll(w -> (double) (isLeft.get(w) ? w.getOutDegree() : w.getInDegree()));
long numHubs = g.getVertices().filter(isLeft).size();
long numAuths = numVertices - numHubs;
if (isLeft.get(v)) {
rank.setAll(w -> isLeft.get(w) ? 0.0 : 1.0 / numAuths);
} else {
rank.setAll(w -> isLeft.get(w) ? 1.0 / numHubs : 0.0);
}
rank.set(v, 1.0);
do {
diff.set(0.0);
g.getVertices().filter(n -> n.getOutDegree() + n.getInDegree() > 0).forEach(n -> {
Scalar<Double> val = Scalar.create(0.0);
if (isLeft.get(n)) {
n.getOutNeighbors()
.forEach(x -> val.reduceAdd(x.getInNeighbors().sum(w -> rank.get(w) / (deg.get(x) * deg.get(w)))));
if (isLeft.get(v)) {
val.set((n == v ? damp : 0.0) + (1.0 - damp) * val.get());
}
} else {
n.getInNeighbors()
.forEach(x -> val.reduceAdd(x.getOutNeighbors().sum(w -> rank.get(w) / (deg.get(x) * deg.get(w)))));
if (!isLeft.get(v)) {
val.set((n == v ? damp : 0.0) + (1.0 - damp) * val.get());
}
}
diff.reduceAdd(abs(val.get() - rank.get(n)));
rank.setDeferred(n, val.get());
});
cnt++;
} while (diff.get() > tol && cnt < maxIter);
}
}
/*
* Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
*/
procedure personalized_salsa (graph G,node v, vertexProp<bool> is_left, double damp,
double tol, int max_iter; vertexProp<double> rank) {
long N = G.numNodes();
if (N == 0) {
return;
}
vertexProp<double> deg;
double diff;
int cnt = 0;
int num_hubs = 0;
G.deg = _.is_left ? _.outDegree() : _.inDegree();
num_hubs = count (n: G.nodes) (n.is_left);
int num_auths = (int) (N - num_hubs);
if (v.is_left) {
G.rank = _.is_left ? 0.0 : 1.0 / num_auths;
} else {
G.rank = _.is_left ? 1.0 / num_hubs : 0.0;
}
v.rank = 1.0;
do {
diff = 0.0;
foreach (n: G.nodes) (n.numNbrs() + n.numInNbrs() > 0) {
double val = 0.0;
if (n.is_left) {
foreach (x: n.outNbrs) {
val += sum (w: x.inNbrs) {w.rank / (x.deg * w.deg)};
}
if (v.is_left) {
val = (n == v ? damp : 0.0) + (1.0 - damp) * val;
}
} else {
foreach (x: n.inNbrs) {
val += sum (w: x.outNbrs) {w.rank / (x.deg * w.deg)};
}
if (!v.is_left) {
val = (n == v ? damp : 0.0) + (1.0 - damp) * val;
}
}
diff += |val - n.rank|;
n.rank <= val;
}
cnt++;
} while (diff > tol && cnt < max_iter);
}
Personalized SALSA (for a set of vertices)
This Personalized version of SALSA allows to select a particular vertex or set of vertices from the given graph in order to give them a greater importance when computing the ranking scores, which will have as result a personalized SALSA score and show relevant (or similar) vertices to the ones chosen for the personalization.
Signature
Input Argument |
Type |
Comment |
G |
graph |
|
source |
nodeSet |
the set of chosen vertices from the graph for personalization. |
is_left |
vertexProp |
boolean vertex property stating the side of the vertices in the bipartite graph (left for hubs, right for auths). |
damp |
double |
damping factor to modulate the degree of personalization of the scores by the algorithm. |
tol |
double |
maximum tolerated error value. The algorithm will stop once the sum of the error values of all vertices becomes smaller than this value. |
max_iter |
int |
maximum number of iterations that will be performed. |
Output Argument |
Type |
Comment |
rank |
vertexProp |
vertex property holding the normalized authority/hub ranking score for each vertex. |
Code
/*
* Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
*/
package oracle.pgx.algorithms;
import oracle.pgx.algorithm.annotations.GraphAlgorithm;
import oracle.pgx.algorithm.PgxGraph;
import oracle.pgx.algorithm.Scalar;
import oracle.pgx.algorithm.VertexProperty;
import oracle.pgx.algorithm.VertexSet;
import oracle.pgx.algorithm.annotations.Out;
import static java.lang.Math.abs;
@GraphAlgorithm
public class SalsaPersonalizedFromSet {
public void personalizedSalsaFromSet(PgxGraph g, VertexSet source, VertexProperty<Boolean> isLeft, double damp,
double tol, int maxIter, @Out VertexProperty<Double> rank) {
long numVertices = g.getNumVertices();
long m = source.size();
if (m == 0 || numVertices == 0) {
return;
}
VertexProperty<Double> deg = VertexProperty.create();
VertexProperty<Boolean> isStart = VertexProperty.create();
Scalar<Double> diff = Scalar.create();
int cnt = 0;
deg.setAll(w -> (double) (isLeft.get(w) ? w.getOutDegree() : w.getInDegree()));
long numHubs = g.getVertices().filter(isLeft).size();
long numAuths = numVertices - numHubs;
long sHub = source.filter(isLeft).size();
long sAut = source.filter(n -> !isLeft.get(n)).size();
if (sHub > 0 && sAut > 0) { // mixed personalization
rank.setAll(0.0);
} else if (sHub > 0 && sAut == 0) { // personalization for hubs
rank.setAll(v -> isLeft.get(v) ? 0.0 : 1.0 / numAuths);
} else if (sHub == 0 && sAut > 0) { // personalization for auths
rank.setAll(v -> isLeft.get(v) ? 1.0 / numHubs : 0.0);
}
isStart.setAll(false);
source.forEach(n -> {
if (isLeft.get(n) && sHub > 0) {
rank.set(n, 1.0 / sHub);
} else if (!isLeft.get(n) && sAut > 0) {
rank.set(n, 1.0 / sAut);
}
isStart.set(n, true);
});
do {
diff.set(0.0);
g.getVertices().filter(n -> n.getOutDegree() + n.getInDegree() > 0).forEach(n -> {
Scalar<Double> val = Scalar.create(0.0);
if (isLeft.get(n)) {
n.getOutNeighbors()
.forEach(v -> val.reduceAdd(v.getInNeighbors().sum(w -> rank.get(w) / (deg.get(v) * deg.get(w)))));
if (sHub > 0) {
val.set((isStart.get(n) ? damp : 0) + (1.0 - damp) * val.get());
}
} else {
n.getInNeighbors()
.forEach(v -> val.reduceAdd(v.getOutNeighbors().sum(w -> rank.get(w) / (deg.get(v) * deg.get(w)))));
if (sAut > 0) {
val.set((isStart.get(n) ? damp : 0) + (1.0 - damp) * val.get());
}
}
diff.reduceAdd(abs(val.get() - rank.get(n)));
rank.setDeferred(n, val.get());
});
cnt++;
} while (diff.get() > tol && cnt < maxIter);
g.getVertices().forEach(n -> {
if (isLeft.get(n) && sHub > 0) {
rank.set(n, rank.get(n) / sHub);
} else if (!isLeft.get(n) && sAut > 0) {
rank.set(n, rank.get(n) / sAut);
}
});
}
}
/*
* Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
*/
procedure personalized_salsa_from_set(graph G, nodeSet source, vertexProp<bool> is_left,
double damp, double tol, int max_iter; vertexProp<double> rank) {
long N = G.numNodes();
if (source.size() == 0 || N == 0) {
return;
}
vertexProp<double> deg;
vertexProp<bool> is_start;
double diff;
int cnt = 0;
int num_hubs = 0;
G.deg = _.is_left ? _.outDegree() : _.inDegree();
num_hubs = count(n:G.nodes)(n.is_left);
int num_auths = (int) N - num_hubs;
int s_hub = count(n: source.items) (n.is_left);
int s_aut = count(n: source.items) (!n.is_left);
if (s_hub > 0 && s_aut > 0) { //mixed personalization
G.rank = 0.0;
} else if (s_hub > 0 && s_aut == 0) { //personalization for hubs
G.rank = _.is_left ? 0.0 : 1.0/num_auths;
} else if (s_hub == 0 && s_aut > 0) { //personalization for auths
G.rank = _.is_left ? 1.0/num_hubs : 0.0;
}
G.is_start = false;
foreach (n: source.items) {
if (n.is_left && s_hub > 0) {
n.rank = 1.0/s_hub;
}
else if (!n.is_left && s_aut > 0) {
n.rank = 1.0/s_aut;
}
n.is_start = true;
}
do {
diff = 0.0;
foreach (n: G.nodes) (n.numNbrs()+n.numInNbrs() > 0) {
double val = 0.0;
if (n.is_left) {
foreach (v: n.outNbrs) {
val += sum (w: v.inNbrs) {w.rank / (v.deg * w.deg)};
}
if (s_hub > 0) {
val = (n.is_start ? damp : 0) + (1.0 - damp) * val;
}
} else {
foreach (v: n.inNbrs){
val += sum (w: v.outNbrs) {w.rank / (v.deg * w.deg)};
}
if (s_aut > 0) {
val = (n.is_start ? damp : 0) + (1.0 - damp) * val;
}
}
diff += |val - n.rank|;
n.rank <= val;
}
cnt++;
} while (diff > tol && cnt < max_iter);
foreach (n: G.nodes) {
if (n.is_left && s_hub > 0) {
n.rank = n.rank / s_hub;
} else if (!n.is_left && s_aut > 0) {
n.rank = n.rank / s_aut;
}
}
}