17.2.5 Supported Property Types for Supervised GraphWise Model
The model supports two types of properties for both vertices and edges:
continuous properties
(boolean, double, float, integer, long)categorical properties
(string)
For categorical properties, two categorical configurations are possible:
one-hot-encoding
: Each category is mapped to a vector, that is concatenated to other features (default)embedding table
: Each category is mapped to an embedding that is concatenated to other features and is trained along with the model
opg4j> import oracle.pgx.config.mllib.inputconfig.CategoricalPropertyConfig;
opg4j> var prop1config = analyst.categoricalPropertyConfigBuilder("vertex_str_feature_1").
oneHotEncoding().
setMaxVocabularySize(100).
build()
opg4j> var prop2config = analyst.categoricalPropertyConfigBuilder("vertex_str_feature_2").
embeddingTable().
setShared(false). // set whether to share the vocabulary or not when several vertex types have a property with the same name
setEmbeddingDimension(32).
setOutOfVocabularyProbability(0.001). // probability to set the word embedding to the out-of-vocabulary embedding
build()
opg4j> var model = analyst.supervisedGraphWiseModelBuilder().
setVertexInputPropertyNames(
"vertex_int_feature_1", // continuous feature
"vertex_str_feature_1", // string feature using one-hot-encoding
"vertex_str_feature_2", // string feature using embedding table
"vertex_str_feature_3" // string feature using one-hot-encoding (default)
).
setVertexTargetPropertyName("label").
setVertexInputPropertyConfigs(prop1config, prop2config).
build()
import oracle.pgx.config.mllib.inputconfig.CategoricalPropertyConfig;
import oracle.pgx.config.mllib.inputconfig.InputPropertyConfig;
InputPropertyConfig prop1config = analyst.categoricalPropertyConfigBuilder("vertex_str_feature_1")
.oneHotEncoding()
.setMaxVocabularySize(100)
.build();
InputPropertyConfig prop2config = analyst.categoricalPropertyConfigBuilder("vertex_str_feature_2")
.embeddingTable()
.setShared(false) // set whether to share the vocabulary or not when several vertex types have a property with the same name
.setEmbeddingDimension(32)
.setOutOfVocabularyProbability(0.001) // probability to set the word embedding to the out-of-vocabulary embedding
.build();
SupervisedGraphWiseModelBuilder model = analyst.supervisedGraphWiseModelBuilder()
.setVertexInputPropertyNames(
"vertex_int_feature_1", // continuous feature
"vertex_str_feature_1", // string feature using one-hot-encoding
"vertex_str_feature_2", // string feature using embedding table
"vertex_str_feature_3" // string feature using one-hot-encoding (default)
)
.setVertexInputPropertyConfigs(prop1config, prop2config)
.setVertexTargetPropertyName("label")
.build();
vertex_input_property_configs = [
analyst.one_hot_encoding_categorical_property_config(
property_name="vertex_str_feature_1",
max_vocabulary_size=100,
),
analyst.learned_embedding_categorical_property_config(
property_name="vertex_str_feature_2",
embedding_dim=4,
shared=False, // set whether to share the vocabulary or not when several types have a property with the same name
oov_probability=0.001 // probability to set the word embedding to the out-of-vocabulary embedding
)
]
model_params = dict(
vertex_input_property_names=[
"vertex_int_feature_1", // continuous feature
"vertex_str_feature_1", // string feature using one-hot-encoding
"vertex_str_feature_2", // string feature using embedding table
"vertex_str_feature_3", // string feature using one-hot-encoding (default)
],
vertex_input_property_configs=vertex_input_property_configs,
vertex_target_property_name="label"
)
model = analyst.supervised_graphwise_builder(**model_params)