教師なし分類(クラスタリング)の例

9.7 教師なし分類(クラスタリング)の例

このSQL例では、コレクション表でドキュメントの小さなコレクションを作成し、CONTEXT索引を作成します。次に、ドキュメント割当て表およびクラスタ記述表を作成しますが、これらはCLUSTERINGプロシージャに対するコールを使用して移入されます。出力は、SELECT文を使用して表示されます。

set serverout on

/* collect document into a table */
create table collection (id number primary key, text varchar2(4000));
insert into collection values (1, 'Oracle Text can index any document or textual content.');
insert into collection values (2, 'Ultra Search uses a crawler to access documents.');
insert into collection values (3, 'XML is a tag-based markup language.');
insert into collection values (4, 'Oracle Database 11g XML DB treats XML 
as a native datatype in the database.');
insert into collection values (5, 'There are three Oracle Text index types to cover 
all text search needs.');
insert into collection values (6, 'Ultra Search also provides API 
for content management solutions.');

create index collectionx on collection(text) 
   indextype is ctxsys.context parameters('nopopulate');

/* prepare result tables, if you omit this step, procedure will create table automatically */
create table restab (       
       docid NUMBER,
       clusterid NUMBER,
       score NUMBER);

create table clusters (
       clusterid NUMBER,
       descript varchar2(4000),
       label varchar2(200),
       size   number,
       quality_score number,
       parent number);

/* set the preference */
exec ctx_ddl.drop_preference('my_cluster');
exec ctx_ddl.create_preference('my_cluster','KMEAN_CLUSTERING');
exec ctx_ddl.set_attribute('my_cluster','CLUSTER_NUM','3');

/* do the clustering */
exec ctx_output.start_log('my_log');
exec ctx_cls.clustering('collectionx','id','restab','clusters','my_cluster');
exec ctx_output.end_log;