Oracle Database 12.2以降、OAAgraph
パッケージは、Oracle R Enterpriseおよびデータベース表とともに使用するためにOracle Spatial and Graph Property Graph In-Memory Analyst (PGX)にRインタフェースを提供します。
OAAgraphについて
PGXは、Oracle Databaseのファンクション、プロシージャ、データ型およびデータ・モデルを統合した製品であり、空間分析およびグラフ分析をサポートしています。OAAgraph
パッケージには、複数のグラフ・アルゴリズム、グラフ変換操作およびグラフ問合せ機能が含まれます。
RのOAAgraph
関数を使用する場合、効率的なPGXグラフ・アルゴリズムおよび表現を使用して、データベースのメモリーのグラフ・メトリックおよび分析を計算できます。結果のdata.frame
オブジェクトを使用して、予測子としてグラフ・メトリックを含むモデルを構築できます。モデルを使用して、データをスコアリングまたは分類できます。次に、結果をグラフ・アルゴリズムを使用してグラフをさらに探索したり新しいメトリックを計算できるグラフ・ノードに追加できます。
OAAgraph
パッケージの利点は次のとおりです。
高速なパラレル・グラフ分析を提供するPGXインメモリー・グラフ分析エンジンへのRのアクセス
多くのグラフ・アルゴリズム
グラフを問い合せてパターン一致を実行する機能
Oracle Big Data Spatial and GraphおよびOracle R Advanced Analytics for Hadoopとの統合
グラフ分析は、グラフとしてデータを表すデータ分析の方法です。データ・エンティティはノードになり、関係はエッジになります。グラフを介してファイン・グレイン関係を分析し、結合を繰り返し計算する必要なく迅速にマルチホップ関係をナビゲートできます。
グラフ・アルゴリズムの主な2つのタイプは次のとおりです。
グラフ全体を分析する計算グラフ分析
関係パターンに適合するサブグラフを検索する問合せであるグラフ・パターン一致
OAAgraphアルゴリズム
OAAgraphパッケージのアルゴリズムは次のとおりです。
例3-77 OAAgraph関数の使用
この例では、OAAgraphパッケージのグラフ機能を使用します。この例では、次の操作を実行しています。
ノードおよびエッジ表からグラフを作成します
データベースに格納されているスナップショット・インメモリー表現からグラフを作成します
グラフ分析アルゴリズムcountTriangles
、degree
、pagerank
およびadamicAdarCounting
を呼び出します
oaa.cursor
オブジェクトを使用します
インメモリー・グラフおよびデータベース・オブジェクトをクリーンアップします
library(ORE) library(OAAgraph) #-- Replace the values in quotation marks with the values for your database dbHost <- "<DATABASE_HOST>" dbUser <- "<DATABASE_USERNAME>" dbPassword <- "<DATABASE_PASSWORD>" dbSid <- "<DATABASE_SID>" pgxBaseUrl <- "<PGX_BASE_URL>" #-- Connect to the Oracle R Enterprise and PGX servers ore.connect(host = dbHost, user = dbUser, password = dbPassword, sid = dbSid) oaa.graphConnect(pgxBaseUrl = pgxBaseUrl, dbHost = dbHost, dbSid = dbSid, dbUser = dbUser, dbPassword = dbPassword) #-- Create the node table in Oracle Database VID <- c(1, 2, 3, 4, 5) NP1 <- c("node1", "node2", "node3", "node4", "node5") NP2 <- c(111.11, 222.22, 333.33, 444.44, 555.55) NP3 <- c(1, 2, 3, 4, 5) nodes <- data.frame(VID, NP1, NP2, NP3) ore.drop(table = "MY_NODES") ore.create(nodes, table = "MY_NODES") #-- Create the edge table in Oracle Database EID <- c(1, 2, 3, 4, 5) SVID <- c(1, 3, 3, 2, 4) DVID <- c(2, 1, 4, 3, 2) EP1 <- c("edge1", "edge2", "edge3", "edge4", "edge5") EL <- c("label1", "label2", "label3", "label4", "label5") edges <- data.frame(EID, SVID, DVID, EP1, EL) ore.drop(table = "MY_EDGES") ore.create(edges, table = "MY_EDGES") #-- Verify that the tables exist as ore.frame objects ore.ls() #-- Create a graph in PGX from the node and edge tables in the database graph <- oaa.graph(MY_EDGES, MY_NODES, "myPgxGraph") names(graph, "nodes") names(graph, "edges") #-- See the result of the countTriangles function, which gives an #-- overview of the number of connections between nodes in neighborhoods countTriangles(graph, sortVerticesByDegree=FALSE) #-- See the results from degree algorithm variants, note the graph nodes #-- are augmented with new properties as indicated by the 'name' argument degree(graph, name = "OutDegree") degree(graph, name = "InDegree", variant = "in") degree(graph, name = "InOutDegree", variant = "all") #-- Create a cursor including the degree properties cursor <- oaa.cursor(graph, c("OutDegree", "InOutDegree", "InDegree"), "nodes") oaa.next(cursor, 5) #-- Create a cursor over the degree properties using #-- the PGX SQL-like query language PGQL cursor <- oaa.cursor(graph, query = "select n.OutDegree, n.InOutDegree, n.InDegree where (n) order by n.OutDegree desc") #-- View the first 5 entries from the cursor oaa.next(cursor, 5) #-- See results from the pagerank algorithm pagerankCursor <- pagerank(graph, 0.085, 0.1, 100) oaa.next(pagerankCursor, 5) #-- Create a cursor over the pagerank property using PGQL cursor <- oaa.cursor(graph, query = "select n.pagerank where (n) order by n.pagerank desc") oaa.next(cursor, 5) #-- You can create a cursor using the R interface as well cursor <- oaa.cursor(graph, "pagerank", ordering = "desc") oaa.next(cursor, 5) #-- Compute the adamic adar index for edges topEdges <- adamicAdarCounting(graph) oaa.next(topEdges) #-- List any graph snapshots available oaa.graphSnapshotList() #-- Export a binary snapshot of the whole graph into Oracle Database #-- and view the listing again oaa.graphSnapshotPersist(graph, nodeProperties = TRUE, edgeProperties = TRUE) oaa.graphSnapshotList() #-- Read the snapshot back into memory graph2 <- oaa.graphSnapshot("myPgxGraph") #-- Export the graph nodes and specific node properties from memory #-- into a database table oaa.create(graph2, nodeTableName = "RANKED_NODES", nodeProperties = TRUE) #-- Export both nodes and edges as tables from memory into the database, #-- but only export the pagerank node property oaa.create(graph2, nodeTableName = "RANKED_GRAPH_N", nodeProperties = c("NP1", "pagerank"), edgeTableName = "RANKED_GRAPH_E") #-- Export the graph edges and their properties from memory into a database table oaa.create(graph2, edgeTableName = "RANKED_EDGES", edgeProperties = TRUE) #-- Free the graphs at the PGX server oaa.rm(graph) oaa.rm(graph2) #-- Clean up the tables created by this example ore.drop("MY_NODES") ore.drop("MY_EDGES") ore.drop("RANKED_NODES") ore.drop("RANKED_GRAPH_N") ore.drop("RANKED_GRAPH_E") ore.drop("RANKED_EDGES") oaa.dropSnapshots("myPgxGraph")
この例のリスト
R> library(ORE) R> library(OAAgraph) R> R> #-- Replace the values in quotation marks with the values for your database R> dbHost <- "<DATABASE_HOST>" R> dbUser <- "<DATABASE_USERNAME>" R> dbPassword <- "<DATABASE_PASSWORD>" R> dbSid <- "<DATABASE_SID>" R> pgxBaseUrl <- "<PGX_BASE_URL>" R> R> #-- Connect to the Oracle R Enterprise and PGX servers R> ore.connect(host = dbHost, user = dbUser, password = dbPassword, sid = dbSid) R> oaa.graphConnect(pgxBaseUrl = pgxBaseUrl, dbHost = dbHost, + dbSid = dbSid, dbUser = dbUser, dbPassword = dbPassword) R> R> #-- Create the node table in Oracle Database R> R> VID <- c(1, 2, 3, 4, 5) R> NP1 <- c("node1", "node2", "node3", "node4", "node5") R> NP2 <- c(111.11, 222.22, 333.33, 444.44, 555.55) R> NP3 <- c(1, 2, 3, 4, 5) R> R> nodes <- data.frame(VID, NP1, NP2, NP3) R> ore.drop(table = "MY_NODES") R> ore.create(nodes, table = "MY_NODES") R> R> #-- Create the edge table in Oracle Database R> R> EID <- c(1, 2, 3, 4, 5) R> SVID <- c(1, 3, 3, 2, 4) R> DVID <- c(2, 1, 4, 3, 2) R> EP1 <- c("edge1", "edge2", "edge3", "edge4", "edge5") R> EL <- c("label1", "label2", "label3", "label4", "label5") R> R> edges <- data.frame(EID, SVID, DVID, EP1, EL) R> R> ore.drop(table = "MY_EDGES") R> ore.create(edges, table = "MY_EDGES") R> R> #-- Verify that the tables exist as ore.frame objects R> R> ore.ls() [1] "ASSIGN_EDGES_SUBSET" "ASSIGN_NODES_SUBSET" "CALL_EDGES" [4] "DF_EDGES_140317215226" "DF_EDGES_150317002703" "DF_NODES_140317215226" [7] "DF_NODES_150317002703" "EDGES" "EDGES_KEY" [10] "EDGES_T" "MY_EDGES" "MY_NODES" [13] "MY_NODES1" "N_H5855" "NODES" [16] "NODES2_T" "NODES30174128" "NODES30174506" [19] "NODES30174740" "NODES_KEY" "NODES_T" [22] "nyc20m" "PERSON_NODES" "PERSON_PAGERANK_NODES" [25] "SCCE" "SCCN" "SUPERHERO_DATA" [28] "SUPERHERO_DATA2" "SUPERHERO_EDGES" "SUPERHERO_IGNORE" [31] "SUPERHERO_INFORMATION" "SUPERHERO_NODES" "SUPERHERO_VALUES" [34] "TABLE1" "TABLE2" "TEMP_EDGES" [37] "TEMP_NODES" "TMPN" R> R> #-- Create a graph in PGX from the node and edge tables in the database R> R> graph <- oaa.graph(MY_EDGES, MY_NODES, "myPgxGraph") R> names(graph, "nodes") [1] "NP1" "NP3" "NP2" R> names(graph, "edges") [1] "EP1" R> R> #-- See the result of the countTriangles function, which gives an R> #-- overview of the number of connections between nodes in neighborhoods R> R> countTriangles(graph, sortVerticesByDegree=FALSE) [1] 2 R> R> #-- See the results from degree algorithm variants; note the graph nodes R> #-- are augmented with new properties as indicated by the 'name' argument R> R> degree(graph, name = "OutDegree") oaa.cursor over: ID, OutDegree position: 0 size: 5 R> degree(graph, name = "InDegree", variant = "in") oaa.cursor over: ID, InDegree position: 0 size: 5 R> degree(graph, name = "InOutDegree", variant = "all") oaa.cursor over: ID, InOutDegree position: 0 size: 5 R> R> #-- Create a cursor including the degree properties R> R> cursor <- oaa.cursor(graph, c("OutDegree", "InOutDegree", "InDegree"), "nodes") R> oaa.next(cursor, 5) OutDegree InOutDegree InDegree 1 1 2 1 2 1 3 2 3 2 3 1 4 1 2 1 5 0 0 0 R> R> #-- Create a cursor over the degree properties using R> #-- the PGX SQL-like query language PGQL R> R> cursor <- oaa.cursor(graph, + query = "select n.OutDegree, n.InOutDegree, n.InDegree + where (n) order by n.OutDegree desc") R> #-- View the first 5 entries from the cursor R> R> oaa.next(cursor, 5) n.OutDegree n.InOutDegree n.InDegree 1 2 3 1 2 1 3 2 3 1 2 1 4 1 2 1 5 0 0 0 R> R> #-- See the results from the pagerank algorithm R> R> pagerankCursor <- pagerank(graph, 0.085, 0.1, 100) R> oaa.next(pagerankCursor, 5) pagerank 2 0.22 3 0.20 1 0.19 4 0.19 5 0.18 R> R> #-- Create a cursor over the pagerank property using PGQL R> R> cursor <- oaa.cursor(graph, + query = "select n.pagerank where (n) + order by n.pagerank descR> ") R> R> oaa.next(cursor, 5) n.pagerank 1 0.22 2 0.20 3 0.19 4 0.19 5 0.18 R> R> #-- You can create a cursor using the R interface as well R> R> cursor <- oaa.cursor(graph, "pagerank", ordering = "desc") R> R> oaa.next(cursor, 5) pagerank 1 0.19 2 0.22 3 0.20 4 0.19 5 0.18 R> R> #-- Compute the adamic adar index for edges R> R> topEdges <- adamicAdarCounting(graph) R> oaa.next(topEdges) adamic_adar 0 0 1 0 2 0 3 0 4 0 R> R> #-- List any graph snapshots available R> R> oaa.graphSnapshotList() [1] "ANONYMOUS_GRAPH_1" "CONNECTIONS" "EXAMPLE_GRAPH" [4] "GRAPH1" "GRAPH_EXPORT_LABELED" "G_160317161147" [7] "G_160317201914" "MYAWESOMEGRAPH" "MYEXAMPLEGRAPH" [10] "MY_GRAPH1" "SAMPLE" "SAMPLE_GRAPH" [13] "SF" "SF_MUTATION" R> R> #-- Export a binary snapshot of the whole graph into Oracle Database R> #-- and view the listing again R> R> oaa.graphSnapshotPersist(graph, nodeProperties = TRUE, edgeProperties = TRUE) R> oaa.graphSnapshotList() [1] "ANONYMOUS_GRAPH_1" "CONNECTIONS" "EXAMPLE_GRAPH" [4] "GRAPH1" "GRAPH_EXPORT_LABELED" "G_160317161147" [7] "G_160317201914" "MYAWESOMEGRAPH" "MYEXAMPLEGRAPH" [10] "MYPGXGRAPH" "MY_GRAPH1" "SAMPLE" [13] "SAMPLE_GRAPH" "SF" "SF_MUTATION" R> R> #-- Read the snapshot back into memory R> R> graph2 <- oaa.graphSnapshot("myPgxGraph") R> R> #-- Export the graph nodes and specific node properties from memory R> #-- into a database table R> R> oaa.create(graph2, nodeTableName = "RANKED_NODES", nodeProperties = TRUE) R> R> #-- Export both nodes and edges as tables from memory into the database, R> #-- but only export the pagerank node property R> R> oaa.create(graph2, nodeTableName = "RANKED_GRAPH_N", + nodeProperties = c("NP1", "pagerank"), + edgeTableName = "RANKED_GRAPH_E") R> R> #-- Export the graph edges and their properties from memory into a database table R> R> oaa.create(graph2, edgeTableName = "RANKED_EDGES", edgeProperties = TRUE) R> R> #-- Free the graphs at the PGX server R> R> oaa.rm(graph) R> oaa.rm(graph2) R> R> #-- Clean up the tables created by this example R> R> ore.drop("MY_NODES") R> ore.drop("MY_EDGES") R> ore.drop("RANKED_NODES") R> ore.drop("RANKED_GRAPH_N") R> ore.drop("RANKED_GRAPH_E") R> ore.drop("RANKED_EDGES") R> R> oaa.dropSnapshots("myPgxGraph")