7.10 拡張可能Rアルゴリズム・モデル
ore.odmRAlg
関数は、拡張可能Rアルゴリズム・モデルを作成します。
拡張可能Rアルゴリズムは、登録済Rスクリプトを使用して、Rモデルを構築、スコア付けおよび表示します。また、分類、回帰、クラスタリング、特徴抽出、属性評価および相関機械学習機能をサポートします。
ore.odmRAlg
関数の引数および関数の使用例の詳細は、help(ore.odmRAlg)
をコールしてください。
拡張可能Rアルゴリズム・モデルの設定
次の表に、拡張可能Rアルゴリズム・モデルに適用される設定を示します。
表7-9 拡張可能Rアルゴリズム・モデルの設定
設定名 | 設定値 | 説明 |
---|---|---|
|
|
Rアルゴリズムのマイニング・モデル作成ファンクションに対する、既存の登録済Rスクリプトの名前を指定します。Rスクリプトにより、トレーニング・データに対して最初に入力した引数のRファンクションが定義され、Rモデル・オブジェクトが戻されます。クラスタリングと特徴抽出のマイニング機能のモデル作成では、RモデルでR属性のdm$nclusおよびdm$nfeatを設定して、クラスタと特徴の数をそれぞれ指定する必要があります。
|
|
SELECT value param_name, ...FROM DUAL |
モデル作成ファンクションのオプションの入力パラメータに対する、数値および文字列のスカラーのリストを指定します。 |
|
|
既存の登録済Rスクリプトの名前を指定して、データをスコアリングします。スクリプトにより、対応する予測結果が含まれる |
|
|
スコアリングの各属性の重み(コントリビューション)を計算するRアルゴリズムに対する、既存の登録済Rスクリプトの名前を指定します。スクリプトにより、各属性の影響を与える重みが行内に含まれる |
RALG_DETAILS_FUNCTION |
R_DETAILS_FUNCTION_SCRIPT_NAME |
モデル情報を生成するRアルゴリズムに対する、既存の登録済Rスクリプトの名前を指定します。モデル・ビューの生成には、この設定が必要です。 |
RALG_DETAILS_FORMAT |
SELECT type_value column_name,FROM DUAL |
生成されるモデル・ビューの出力列タイプと列名の数値および文字列のスカラーのリストに対する、SELECT 問合せを指定します。モデル・ビューの生成には、この設定が必要です。
|
例7-9 ore.odmRAlg関数の使用方法
library(OREembed)
digits <- getOption("digits")
options(digits = 5L)
IRIS <- ore.push(iris)
# Regression with glm
ore.scriptCreate("glm_build",
function(data, form, family)
glm(formula = form, data = data, family = family))
ore.scriptCreate("glm_score",
function(mod, data)
{ res <- predict(mod, newdata = data);
data.frame(res) })
ore.scriptCreate("glm_detail", function(mod)
data.frame(name=names(mod$coefficients),
coef=mod$coefficients))
ore.scriptList(name = "glm_build")
ore.scriptList(name = "glm_score")
ore.scriptList(name = "glm_detail")
ralg.glm <- ore.odmRAlg(IRIS, mining.function = "regression",
formula = c(form="Sepal.Length ~ ."),
build.function = "glm_build",
build.parameter = list(family="gaussian"),
score.function = "glm_score",
detail.function = "glm_detail",
detail.value = data.frame(name="a", coef=1))
summary(ralg.glm)
predict(ralg.glm, newdata = head(IRIS), supplemental.cols = "Sepal.Length")
ore.scriptDrop(name = "glm_build")
ore.scriptDrop(name = "glm_score")
ore.scriptDrop(name = "glm_detail")
# Classification with nnet
ore.scriptCreate("nnet_build",
function(dat, form, sz){
require(nnet);
set.seed(1234);
nnet(formula = formula(form), data = dat,
size = sz, linout = TRUE, trace = FALSE);
},
overwrite = TRUE)
ore.scriptCreate("nnet_detail", function(mod)
data.frame(conn = mod$conn, wts = mod$wts),
overwrite = TRUE)
ore.scriptCreate("nnet_score",
function(mod, data) {
require(nnet);
res <- data.frame(predict(mod, newdata = data));
names(res) <- sort(mod$lev); res
})
ralg.nnet <- ore.odmRAlg(IRIS, mining.function = "classification",
formula = c(form="Species ~ ."),
build.function = "nnet_build",
build.parameter = list(sz=2),
score.function = "nnet_score",
detail.function = "nnet_detail",
detail.value = data.frame(conn=1, wts =1))
summary(ralg.nnet)
predict(ralg.nnet, newdata = head(IRIS), supplemental.cols = "Species")
ore.scriptDrop(name = "nnet_build")
ore.scriptDrop(name = "nnet_score")
ore.scriptDrop(name = "nnet_detail")
# Feature extraction with pca
# Feature extraction with pca
ore.scriptCreate("pca_build",
function(dat){
mod <- prcomp(dat, retx = FALSE)
attr(mod, "dm$nfeat") <- ncol(mod$rotation)
mod},
overwrite = TRUE)
ore.scriptCreate("pca_score",
function(mod, data) {
res <- predict(mod, data)
as.data.frame(res)},
overwrite=TRUE)
ore.scriptCreate("pca_detail",
function(mod) {
rotation_t <- t(mod$rotation)
data.frame(id = seq_along(rownames(rotation_t)),
rotation_t)},
overwrite = TRUE)
X <- IRIS[, -5L]
ralg.pca <- ore.odmRAlg(X,
mining.function = "feature_extraction",
formula = NULL,
build.function = "pca_build",
score.function = "pca_score",
detail.function = "pca_detail",
detail.value = data.frame(Feature.ID=1,
ore.pull(head(X,1L))))
summary(ralg.pca)
head(cbind(X, Pred = predict(ralg.pca, newdata = X)))
ore.scriptDrop(name = "pca_build")
ore.scriptDrop(name = "pca_score")
ore.scriptDrop(name = "pca_detail")
options(digits = digits)
この例のリスト
R> library(OREembed)
R>
R> digits <- getOption("digits")
R> options(digits = 5L)
R>
R> IRIS <- ore.push(iris)
R>
R> # Regression with glm
R> ore.scriptCreate("glm_build",
+ function(data, form, family)
+ glm(formula = form, data = data, family = family))
R>
R> ore.scriptCreate("glm_score",
+ function(mod, data)
+ { res <- predict(mod, newdata = data);
+ data.frame(res) })
R>
R> ore.scriptCreate("glm_detail", function(mod)
+ data.frame(name=names(mod$coefficients),
+ coef=mod$coefficients))
R>
R> ore.scriptList(name = "glm_build")
NAME SCRIPT
1 glm_build function (data, form, family) \nglm(formula = form, data = data, family = family)
R> ore.scriptList(name = "glm_score")
NAME SCRIPT
1 glm_score function (mod, data) \n{\n res <- predict(mod, newdata = data)\n data.frame(res)\n}
R> ore.scriptList(name = "glm_detail")
NAME SCRIPT
1 glm_detail function (mod) \ndata.frame(name = names(mod$coefficients), coef = mod$coefficients)
R>
R> ralg.glm <- ore.odmRAlg(IRIS, mining.function = "regression",
+ formula = c(form="Sepal.Length ~ ."),
+ build.function = "glm_build",
+ build.parameter = list(family="gaussian"),
+ score.function = "glm_score",
+ detail.function = "glm_detail",
+ detail.value = data.frame(name="a", coef=1))
R>
R> summary(ralg.glm)
Call:
ore.odmRAlg(data = IRIS, mining.function = "regression", formula = c(form = "Sepal.Length ~ ."),
build.function = "glm_build", build.parameter = list(family = "gaussian"),
score.function = "glm_score", detail.function = "glm_detail",
detail.value = data.frame(name = "a", coef = 1))
Settings:
value
odms.missing.value.treatment odms.missing.value.auto
odms.sampling odms.sampling.disable
prep.auto OFF
build.function OML_USER.glm_build
build.parameter select 'Sepal.Length ~ .' "form", 'gaussian' "family" from dual
details.format select cast('a' as varchar2(4000)) "name", 1 "coef" from dual
details.function OML_USER.glm_detail
score.function OML_USER.glm_score
name coef
1 (Intercept) 2.17127
2 Petal.Length 0.82924
3 Petal.Width -0.31516
4 Sepal.Width 0.49589
5 Speciesversicolor -0.72356
6 Speciesvirginica -1.02350
R> predict(ralg.glm, newdata = head(IRIS), supplemental.cols = "Sepal.Length")
Sepal.Length PREDICTION
1 5.1 5.0048
2 4.9 4.7568
3 4.7 4.7731
4 4.6 4.8894
5 5.0 5.0544
6 5.4 5.3889
R>
R> ore.scriptDrop(name = "glm_build")
R> ore.scriptDrop(name = "glm_score")
R> ore.scriptDrop(name = "glm_detail")
R>
R> # Classification with nnet
R> ore.scriptCreate("nnet_build",
+ function(dat, form, sz){
+ require(nnet);
+ set.seed(1234);
+ nnet(formula = formula(form), data = dat,
+ size = sz, linout = TRUE, trace = FALSE);
+ },
+ overwrite = TRUE)
R>
R> ore.scriptCreate("nnet_detail", function(mod)
+ data.frame(conn = mod$conn, wts = mod$wts),
+ overwrite = TRUE)
R>
R> ore.scriptCreate("nnet_score",
+ function(mod, data) {
+ require(nnet);
+ res <- data.frame(predict(mod, newdata = data));
+ names(res) <- sort(mod$lev); res
+ })
R>
R> ralg.nnet <- ore.odmRAlg(IRIS, mining.function = "classification",
+ formula = c(form="Species ~ ."),
+ build.function = "nnet_build",
+ build.parameter = list(sz=2),
+ score.function = "nnet_score",
+ detail.function = "nnet_detail",
+ detail.value = data.frame(conn=1, wts =1))
R>
R> summary(ralg.nnet)
Call:
ore.odmRAlg(data = IRIS, mining.function = "classification",
formula = c(form = "Species ~ ."), build.function = "nnet_build",
build.parameter = list(sz = 2), score.function = "nnet_score",
detail.function = "nnet_detail", detail.value = data.frame(conn = 1,
wts = 1))
Settings:
value
clas.weights.balanced OFF
odms.missing.value.treatment odms.missing.value.auto
odms.sampling odms.sampling.disable
prep.auto OFF
build.function OML_USER.nnet_build
build.parameter select 'Species ~ .' "form", 2 "sz" from dual
details.format select 1 "conn", 1 "wts" from dual
details.function OML_USER.nnet_detail
score.function OML_USER.nnet_score
conn wts
1 0 1.46775
2 1 -12.88542
3 2 -4.38886
4 3 9.98648
5 4 16.57056
6 0 0.97809
7 1 -0.51626
8 2 -0.94815
9 3 0.13692
10 4 0.35104
11 0 37.22475
12 5 -66.49123
13 6 70.81160
14 0 -4.50893
15 5 7.01611
16 6 20.88774
17 0 -32.15127
18 5 58.92088
19 6 -91.96989
R> predict(ralg.nnet, newdata = head(IRIS), supplemental.cols = "Species")
Species PREDICTION PROBABILITY
1 setosa setosa 0.99999
2 setosa setosa 0.99998
3 setosa setosa 0.99999
4 setosa setosa 0.99998
5 setosa setosa 1.00000
6 setosa setosa 0.99999
R>
R> ore.scriptDrop(name = "nnet_build")
R> ore.scriptDrop(name = "nnet_score")
R> ore.scriptDrop(name = "nnet_detail")
R>
R> ore.scriptCreate("pca_build",
+ function(dat){
+ mod <- prcomp(dat, retx = FALSE)
+ attr(mod, "dm$nfeat") <- ncol(mod$rotation)
+ mod},
+ overwrite = TRUE)
R>
R> ore.scriptCreate("pca_score",
+ function(mod, data) {
+ res <- predict(mod, data)
+ as.data.frame(res)},
+ overwrite=TRUE)
R>
R> ore.scriptCreate("pca_detail",
+ function(mod) {
+ rotation_t <- t(mod$rotation)
+ data.frame(id = seq_along(rownames(rotation_t)),
+ rotation_t)},
+ overwrite = TRUE)
R>
R> X <- IRIS[, -5L]
R> ralg.pca <- ore.odmRAlg(X,
+ mining.function = "feature_extraction",
+ formula = NULL,
+ build.function = "pca_build",
+ score.function = "pca_score",
+ detail.function = "pca_detail",
+ detail.value = data.frame(Feature.ID=1,
+ ore.pull(head(X,1L))))
R>
R> summary(ralg.pca)
Call:
ore.odmRAlg(data = X, mining.function = "feature_extraction",
formula = NULL, build.function = "pca_build", score.function = "pca_score",
detail.function = "pca_detail", detail.value = data.frame(Feature.ID = 1,
ore.pull(head(X, 1L))))
Settings:
value
odms.missing.value.treatment odms.missing.value.auto
odms.sampling odms.sampling.disable
prep.auto OFF
build.function OML_USER.pca_build
details.format select 1 "Feature.ID", 5.1 "Sepal.Length", 3.5 "Sepal.Width", 1.4 "Petal.Length", 0.2 "Petal.Width" from dual
details.function OML_USER.pca_detail
score.function OML_USER.pca_score
Feature.ID Sepal.Length Sepal.Width Petal.Length Petal.Width
1 1 0.856671 0.358289 0.36139 -0.084523
2 2 -0.173373 -0.075481 0.65659 0.730161
3 3 0.076236 0.545831 -0.58203 0.597911
4 4 0.479839 -0.753657 -0.31549 0.319723
R> head(cbind(X, Pred = predict(ralg.pca, newdata = X)))
Sepal.Length Sepal.Width Petal.Length Petal.Width FEATURE_ID
1 5.1 3.5 1.4 0.2 2
2 4.9 3.0 1.4 0.2 4
3 4.7 3.2 1.3 0.2 3
4 4.6 3.1 1.5 0.2 4
5 5.0 3.6 1.4 0.2 2
6 5.4 3.9 1.7 0.4 2
R>
R> ore.scriptDrop(name = "pca_build")
R> ore.scriptDrop(name = "pca_score")
R> ore.scriptDrop(name = "pca_detail")
R>
R> options(digits = digits)