PGX 1.2.0
Documentation

Matrix Factorization (Gradient Descent)

Time Complexity O(E*k*max) [k = vector length, max = max steps]    Space Requirement O(N*k) [k = vector length]

Computes features based on relationships between vertices.

Signature

Input Argument Type Comment
G graph
is_left nodeProp<bool> the property used to determine which nodes are ones recommendations will be generated for
weight edgeProp<double> the edge property that determines the weight of a rating
learning_rate double
change_per_step double
lambda double
max_step int
vector_length int
Output Argument Type Comment
dest_property nodeProp<vect<double>[vector_length]> the property to generate features into
Return Value Type Comment
double The computed root mean square error

Green-Marl Code

/*
* Copyright (C) 2013 - 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Matrix factorization gradient descent - computes "features" based on relationships
* between nodes.
*
* Input Parameters:
*   graph:      the graph for which the MST will be computed.
*       is_left:        the property used to determine which nodes are ones recommendations will
*                       be generated for
*       weight:         the edge property that determines the weight of a rating
*       learning_rate:  
*       change_per_step:  
*       lambda:  
*       max_step:  
*       vector_length:  the number of features to compute
*
* Output Parameters:
*   dest_property:  the property to generate features into
*
* return Value:
*           the root mean square error
*
*/
procedure matrix_factorization_gradient_descent(G: graph, 
              is_left: nodeProp<bool>,
              weight: edgeProp<double>,
              learning_rate: double,
              change_per_step: double,
              lambda: double,
              max_step: int,
              vector_length: int;
              dest_property: nodeProp<vect<double>[vector_length]>): double {

    nodeProp<vect<double>[vector_length]> dest_property_next;
    for(n: G.nodes) {
        n.dest_property = uniform();
    }

    G.dest_property_next = _.dest_property;
    double VALUE_MAX = 5.0;
    double VALUE_MIN = 1.0;
    double rate = learning_rate;
    int nupdate = 0;
    int counter = 0;

    double root_mean_square_error = 0.0;
    while (counter < max_step) {

        foreach (curr_node: G.nodes) {
            if (curr_node.is_left) {
                vect<double>[vector_length] Z = 0.0;

                for(curr_edge: curr_node.outEdges) {
                    node opposite_node = curr_edge.toNode();

                    double weight_1 = curr_edge.weight;
                    double weight_2 = curr_node.dest_property * opposite_node.dest_property;

                    if (weight_2 > VALUE_MAX) {
                        weight_2 = VALUE_MAX;
                    } else if (weight_2 < VALUE_MIN) {
                        weight_2 = VALUE_MIN;
                    }

                    Z += ((weight_1 - weight_2) * opposite_node.dest_property - lambda * curr_node.dest_property);
                    root_mean_square_error += (weight_1 - weight_2) * (weight_1 - weight_2);
                }
                        curr_node.dest_property_next = curr_node.dest_property + rate * Z;
            } else {
                vect<double>[vector_length] Z = 0.0;

                for(curr_edge: curr_node.inEdges) {
                    node opposite_node = curr_edge.fromNode();

                    double weight_1 = curr_edge.weight;
                    double weight_2 = curr_node.dest_property * opposite_node.dest_property;

                    if (weight_2 > VALUE_MAX) {
                        weight_2 = VALUE_MAX;
                    } else if (weight_2 < VALUE_MIN) {
                        weight_2 = VALUE_MIN;
                    }

                    Z += (weight_1 - weight_2) * opposite_node.dest_property - lambda * curr_node.dest_property;
                    root_mean_square_error += (weight_1 - weight_2) * (weight_1 - weight_2);
                }               
                curr_node.dest_property_next = curr_node.dest_property + rate * Z;
            } 
        }
        G.dest_property = _.dest_property_next;
        root_mean_square_error = sqrt(root_mean_square_error / (G.numEdges() * 2));
        rate *= change_per_step;
        counter++;
    }
    return root_mean_square_error;
}

Compute estimated ratings

The following helper program can be used to compute the estimated ratings for a specific vertex based on the matrix factorization results:

/*
* Copyright (C) 2013 - 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Estimate ratings for a node, using "features" computed in an earlier call to
* matrix_factorization_gradient_descent.
*
* Input Parameters:
*   graph:      the graph for which the MST will be computed.
*       user:           the vertex to generate ratings for
*       is_left:        the property used to determine which nodes are ones recommendations will
*                       be generated for
*       vector_length:  the number of features
*       feature:        the property containing the features
*
* Output Parameters:
*   estimated_rating:  the property to generate ratings into
*
* return Value:
*           none
*
*/
procedure compute_estimated_rating (
    G: graph, 
    user: node,
    is_left: nodeProp<bool>,
    vector_length: int,
    feature: nodeProp<vect<double>[vector_length]>;
    estimated_rating: nodeProp<double> ) {

    foreach(n: G.nodes) {
        if ( n.is_left ) {
            n.estimated_rating = 0;
        } else if ( n.hasEdgeFrom (user) ) {
             // already seen the movie
            n.estimated_rating = 0;
        } else {
            // inner product of two feature vectors
            n.estimated_rating = user.feature * n.feature;
        }
    }
}