PGX 20.2.2
Documentation

Hyperlink-Induced Topic Search (HITS)

Category ranking and walking

Algorithm ID pgx_builtin_k5_hits

Time Complexity O(E * k) with E = number of edges, k <= maximum number of iterations

Space Requirement O(2 * V) with V = number of vertices

Javadoc


HITS is an algorithm that computes two ranking scores (authority and hub)for each vertex in the graph. The idea of hubs and authorites comes from the web pages: a hub is regarded as a page that is not authoritative in a specific topic, but it has instead links to authority pages, which are regarded as meaningful sources for a particular topic by many hubs. Thus a good hub will point to many authorities, while a good authority will be pointed by many hubs. The authority score of a vertex V is computed by adding all the hub scores of its incomming neighbors (i.e. vertices with edges pointing to V). The hub score is computed in a similar way, using the authority scores instead.

Signature

Input Argument Type Comment
G graph
max_iter int number of iterations that will be performed.
Output Argument Type Comment
auth vertexProp vertex property holding the authority score for each vertex.
hub vertexProp vertex property holding the hub score for each vertex.

Code

/*
 * Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
 */
package oracle.pgx.algorithms;

import oracle.pgx.algorithm.annotations.GraphAlgorithm;
import oracle.pgx.algorithm.PgxGraph;
import oracle.pgx.algorithm.Scalar;
import oracle.pgx.algorithm.VertexProperty;
import oracle.pgx.algorithm.annotations.Out;

import static java.lang.Math.sqrt;

@GraphAlgorithm
public class Hits {
  public void hits(PgxGraph g, int maxIter, @Out VertexProperty<Double> auth, @Out VertexProperty<Double> hub) {
    auth.setAll(1.0);
    hub.setAll(1.0);

    int k = 0;
    while (k < maxIter) {
      Scalar<Double> norm = Scalar.create();

      // phase 1. update auth from hub
      norm.set(0d);

      g.getVertices().forEach(p -> {
        double v = p.getInNeighbors().sum(hub);
        norm.reduceAdd(v * v);
        auth.set(p, v);
      });
      norm.set(sqrt(norm.get()));
      auth.setAll(v -> auth.get(v) / norm.get());

      // phase 2. hub from auth
      norm.set(0d);

      g.getVertices().forEach(p -> {
        double v = p.getOutNeighbors().sum(auth);
        norm.reduceAdd(v * v);
        hub.set(p, v);
      });
      norm.set(sqrt(norm.get()));
      hub.setAll(v -> hub.get(v) / norm.get());

      k++;
    }
  }
}
/*
 * Copyright (C) 2013 - 2020 Oracle and/or its affiliates. All rights reserved.
 */

procedure hits(graph G, int max_iter; vertexProp<double> auth, vertexProp<double> hub) {

  G.auth = 1.0;
  G.hub = 1.0;

  int k = 0;
  while (k < max_iter) {
    double norm;

    // phase 1. update auth from hub
    norm = 0;
    foreach (p: G.nodes) {
      double v = sum (q: p.inNbrs) {q.hub};
      norm += v * v;
      p.auth = v;
    }
    norm = sqrt(norm);
    // Normalize
    G.auth = _.auth / norm;

    // phase 2. hub from auth
    norm = 0;
    foreach (p: G.nodes) {
      double v = sum (q: p.outNbrs) {q.auth};
      norm += v * v;
      p.hub = v;
    }
    norm = sqrt(norm);
    // Normalize
    G.hub = _.hub / norm;
    k++;
  }

}