FLOAT32ベクトル・ジェネレータを使用したSQLクイック・スタート
ベクトル・ジェネレータを作成するPL/SQLプログラムが、問合せおよび結果の例とともに含まれ、ベクトル埋込みモデルを使用せずにOracle AI Vector Searchを簡単に開始できます。
ジェネレータは、指定した数のディメンションおよびクラスタでベクトルをランダムに生成できるPL/SQLプログラムです。各次元値は、指定の最小値と最大値を使用して生成されます。生成プロセスの出力は、
genvec
という表の移入で、これを使用するなどして、類似検索を試すことができます。
次の手順では、十分な権限(最小でDB_DEVELOPER_ROLE
ロール)を持つデータベース・アカウントへのアクセス権がすでにあることを前提としています。
ノート:
本番データベースでベクトル・ジェネレータを使用しないでください。このプログラムは、テストおよびデモの目的で使用可能になります。-
genvec
表を作成します。DROP TABLE genvec PURGE; CREATE TABLE genvec ( id number, -- id of the generated vector v VECTOR, -- generated vector name VARCHAR2(500), -- name for the generated vector: C1 to Cn are centroids, Cx_y is vector number y in cluster number x nv VECTOR, -- normalized version of the generated vector ly number -- random number you can use to filter out rows in addition to similarity search on vectors );
- パッケージ
vector_gen_pkg
とその関連パッケージ本体を作成します。ここでは、ベクトル・ジェネレータ・パッケージを作成します。
CREATE OR REPLACE PACKAGE vector_gen_pkg AS TYPE t_vectors IS TABLE OF vector INDEX BY PLS_INTEGER; FUNCTION get_coordinate( input_string CLOB, i PLS_INTEGER ) RETURN NUMBER; PROCEDURE generate_vectors( num_vectors IN PLS_INTEGER, -- Number of vectors to generate dimensions IN PLS_INTEGER, -- Number of dimensions of each vector num_clusters IN PLS_INTEGER, -- Number of clusters to create cluster_spread IN NUMBER, -- Relative closeness of each vector in each cluster (using standard deviation) min_value IN NUMBER, -- Minimum value for a vector coordinate max_value IN NUMBER -- Maximum value for a vector coordinate ); END vector_gen_pkg; /
また、パッケージ本体を作成します。
CREATE OR REPLACE PACKAGE BODY vector_gen_pkg AS ------------------------------------------- ------------------------------------------- ---- V E C T O R G E N E R A T O R ----- ------------------------------------------- ------------------------------------------- -- Version 1.0 --------- ------------------------------------------- ------------------------------------------- ---- DO NOT USE ON PRODUCTION DATABASES --- ---- ONLY FOR TESTING AND DEMO PURPOSES --- ------------------------------------------- FUNCTION get_coordinate( input_string CLOB, i PLS_INTEGER ) RETURN NUMBER IS start_pos NUMBER; end_pos NUMBER; comma_pos NUMBER; coord VARCHAR2(100); comma_count NUMBER := 0; commas NUMBER; working_string CLOB; BEGIN -- Remove leading and trailing brackets working_string := input_string; working_string := TRIM(BOTH '[]' FROM working_string); commas := LENGTH(working_string) - LENGTH(REPLACE(working_string, ',', '')); -- Initialize positions start_pos := 1; end_pos := INSTR(working_string, ',', start_pos); IF i<=0 OR i>commas+1 THEN RETURN NULL; END IF; -- Loop through the string to find the i-th coordinate LOOP IF comma_count + 1 = i THEN IF end_pos = 0 THEN -- If there's no more comma, the coordinate is the rest of the string coord := SUBSTR(working_string, start_pos); ELSE coord := SUBSTR(working_string, start_pos, end_pos - start_pos); END IF; RETURN coord; END IF; -- Move to the next coordinate comma_count := comma_count + 1; start_pos := end_pos + 1; end_pos := INSTR(working_string, ',', start_pos); -- Exit loop if no more coordinates EXIT WHEN start_pos > LENGTH(working_string); END LOOP; -- If the function hasn't returned yet, the index was out of bounds RETURN NULL; END; PROCEDURE generate_random_vector( dimensions IN PLS_INTEGER, min_value IN NUMBER, max_value IN NUMBER, vec OUT vector ) IS e CLOB; BEGIN e := '['; FOR i IN 1..dimensions-1 LOOP e := e || DBMS_RANDOM.VALUE(min_value, max_value) ||','; END LOOP; e := e || DBMS_RANDOM.VALUE(min_value, max_value) ||']'; vec := VECTOR(e); END generate_random_vector; PROCEDURE generate_clustered_vector( centroid IN vector, cluster_spread IN NUMBER, vec OUT vector ) IS e CLOB; d number; BEGIN d := VECTOR_DIMENSION_COUNT(centroid); e := '['; FOR i IN 1 .. d-1 LOOP e := e || (get_coordinate(to_clob(centroid),i) + (DBMS_RANDOM.NORMAL * cluster_spread)) ||','; END LOOP; e := e || (get_coordinate(to_clob(centroid),VECTOR_DIMENSION_COUNT(centroid)) + (DBMS_RANDOM.NORMAL * cluster_spread)) || ']'; vec := VECTOR(e); END generate_clustered_vector; FUNCTION normalize_vector(vec IN vector) RETURN vector IS e CLOB; v CLOB; n number; d number; BEGIN n := VECTOR_NORM(vec); v := to_clob(vec); d := VECTOR_DIMENSION_COUNT(vec); e := '['; FOR i IN 1 .. d-1 LOOP e := e || (get_coordinate(v,i)/n) ||','; END LOOP; e := e || (get_coordinate(v,d)/n) || ']'; RETURN VECTOR(e); END normalize_vector; PROCEDURE generate_vectors( num_vectors IN PLS_INTEGER, -- Must be 1 or above dimensions IN PLS_INTEGER, -- Must be above 1 but less than 500 num_clusters IN PLS_INTEGER, -- Must be 1 or above cluster_spread IN NUMBER, -- Must be grather than 0 min_value IN NUMBER, max_value IN NUMBER ) IS centroids t_vectors; vectors_per_cluster PLS_INTEGER; remaining_vectors PLS_INTEGER; vec vector; idx PLS_INTEGER := 1; max_id NUMBER; working_vector VECTOR; BEGIN IF (num_vectors) <=0 OR (num_clusters < 1) OR (num_vectors < num_clusters) OR (dimensions <= 0) OR (dimensions > 500) OR (cluster_spread <= 0) OR (min_value >= max_value) THEN RETURN; END IF; SELECT MAX(id) INTO max_id FROM genvec; IF max_id IS NULL THEN max_id := 0; END IF; -- Generate cluster centroids FOR i IN 1..num_clusters LOOP generate_random_vector(dimensions, min_value, max_value, centroids(i)); working_vector := normalize_vector(centroids(i)); INSERT INTO genvec VALUES (max_id + idx, centroids(i), 'C'||i, working_vector, DBMS_RANDOM.VALUE(3,600000000)); idx := idx + 1; END LOOP; -- Calculate vectors per cluster vectors_per_cluster := TRUNC(num_vectors / num_clusters); remaining_vectors := num_vectors MOD num_clusters; -- Generate vectors for each cluster IF vectors_per_cluster > 1 THEN FOR i IN 1..num_clusters LOOP FOR j IN 1..(vectors_per_cluster - 1) LOOP generate_clustered_vector(centroids(i), cluster_spread, vec); working_vector := normalize_vector(vec); INSERT INTO genvec VALUES (max_id + idx, vec, 'C'||i||'-'||j, working_vector, DBMS_RANDOM.VALUE(3,600000000)); idx := idx + 1; END LOOP; END LOOP; END IF; -- Handle remaining vectors: all associated with cluster 1 IF remaining_vectors > 0 THEN FOR j IN 1..remaining_vectors LOOP generate_clustered_vector(centroids(1), cluster_spread, vec); working_vector := normalize_vector(vec); INSERT INTO genvec VALUES (max_id + idx, vec, 'C1-'||idx, working_vector, DBMS_RANDOM.VALUE(3,600000000)); idx := idx + 1; END LOOP; END IF; COMMIT; END generate_vectors; END vector_gen_pkg; /
- ベクトル・ジェネレータを設定したら、これと次の手順を実行して、その使用方法を理解できます。
サンプル値を使用して、
vector_gen_pkg
パッケージのgenerate_vectors
プロシージャを実行します。BEGIN vector_gen_pkg.generate_vectors( num_vectors => 100, -- Number of vectors to generate. Must be 1 or above dimensions => 3, -- Number of dimensions of each vector. Must be above 1 but less than 500 num_clusters => 6, -- Number of clusters to create. Must be 1 or above cluster_spread => 1, -- Relative closeness of each vector in each cluster (using standard deviation). Must be grather than 0 min_value => 0, -- Minimum value for a vector coordinate max_value => 100 -- Maximum value for a vector coordinate. Min value must be smaller than max value ); END; /
SELECT
文を実行して、新しく生成されたベクトルを表示します。SELECT name, v FROM genvec;
出力例:
NAME V _______ ____________________________________________________ C1 [6.35792809E+001,5.28954163E+001,5.16500435E+001] C2 [5.67991257E+001,5.00640755E+001,2.3642437E+001] C3 [2.42510891E+001,5.36970367E+001,6.88145638E+000] C4 [8.13146515E+001,2.88190498E+001,4.09245186E+001] C5 [6.70646744E+001,2.53395119E+001,6.14522667E+001] C6 [5.60192604E+001,8.31662598E+001,2.93592377E+001] C1-1 [6.4469986E+001,5.25044632E+001,5.22250557E+001] C1-2 [6.31295433E+001,5.21443062E+001,5.02242126E+001] C1-3 [6.25154915E+001,5.25730362E+001,5.2617794E+001] C1-4 [6.3491375E+001,5.21440697E+001,5.06722069E+001] C1-5 [6.21516266E+001,5.32161064E+001,5.23233032E+001] C1-6 [6.2913269E+001,5.16970291E+001,5.16683655E+001] C1-7 [6.22267456E+001,5.40408363E+001,5.0272541E+001] C1-8 [6.1414093E+001,5.28870888E+001,5.27458E+001] NAME V ________ ____________________________________________________ C1-9 [6.29652252E+001,5.32767754E+001,5.27030106E+001] C1-10 [6.35940704E+001,5.27265244E+001,5.23180656E+001] C1-11 [6.34133224E+001,5.39401283E+001,5.29368248E+001] C1-12 [6.18856697E+001,5.31113129E+001,5.18861504E+001] C1-13 [6.32378883E+001,5.30308647E+001,5.04571724E+001] C1-14 [6.18148689E+001,5.33705482E+001,5.29123802E+001] C1-15 [6.43224258E+001,5.23124084E+001,5.21299057E+001] C2-1 [5.59053535E+001,5.20054626E+001,2.28595486E+001] C2-2 [5.71644516E+001,5.13243408E+001,2.31167526E+001] C2-3 [5.66626244E+001,5.00615959E+001,2.27138176E+001] C2-4 [5.73383865E+001,5.04509125E+001,2.36539135E+001] C2-5 [5.6621357E+001,5.01576767E+001,2.38867531E+001] C2-6 [5.59768562E+001,5.17590942E+001,2.49088764E+001] C2-7 [5.64437904E+001,4.71531525E+001,2.23245487E+001] NAME V ________ ____________________________________________________ C2-8 [5.81449051E+001,5.09049644E+001,2.29072056E+001] C2-9 [5.37190132E+001,4.87386665E+001,2.28188381E+001] C2-10 [5.77416382E+001,4.93461685E+001,2.32014389E+001] C2-11 [5.68353958E+001,5.11093979E+001,2.43693123E+001] C2-12 [5.79631157E+001,5.0297657E+001,2.28039799E+001] C2-13 [5.57930183E+001,5.11965866E+001,2.35887661E+001] C2-14 [5.57345848E+001,5.03228951E+001,2.30780907E+001] C2-15 [5.69435997E+001,4.8590435E+001,2.58747597E+001] C3-1 [2.40239315E+001,5.2352993E+001,5.63517284E+000] C3-2 [2.39717846E+001,5.30635986E+001,5.86633539E+000] C3-3 [2.70314407E+001,5.48788643E+001,7.96345377E+000] C3-4 [2.39875908E+001,5.39634552E+001,5.87654877E+000] C3-5 [2.47772026E+001,5.2187336E+001,6.83652115E+000] C3-6 [2.32920208E+001,5.41494293E+001,6.40737772E+000] NAME V ________ ____________________________________________________ C3-7 [2.46129742E+001,5.32308769E+001,6.29999685E+000] C3-8 [2.51000671E+001,5.33271561E+001,8.86797047E+000] C3-9 [2.4337059E+001,5.26281281E+001,6.9616766E+000] C3-10 [2.39770508E+001,5.42386856E+001,5.63018417E+000] C3-11 [2.59837551E+001,5.34013176E+001,6.97773361E+000] C3-12 [2.40400314E+001,5.25649719E+001,7.2636981E+000] C3-13 [2.13184013E+001,5.28633308E+001,8.3834734E+000] C3-14 [2.50075855E+001,5.21548729E+001,6.88196087E+000] C3-15 [2.53695087E+001,5.60495186E+001,6.76059389E+000] C4-1 [8.28819885E+001,2.95163822E+001,4.03809738E+001] C4-2 [8.18269348E+001,2.95735188E+001,3.99435768E+001] C4-3 [8.2709259E+001,2.90755043E+001,4.07345886E+001] C4-4 [8.18622665E+001,2.88013916E+001,4.1822567E+001] C4-5 [7.99165421E+001,2.89941139E+001,4.09653854E+001] NAME V ________ ____________________________________________________ C4-6 [8.12936707E+001,2.98655643E+001,4.00380211E+001] C4-7 [8.21705704E+001,2.90163479E+001,3.94858704E+001] C4-8 [8.20081329E+001,2.89751148E+001,4.1045887E+001] C4-9 [8.25486298E+001,2.84143009E+001,4.15654945E+001] C4-10 [8.22034149E+001,2.92223415E+001,4.20033302E+001] C4-11 [8.2048996E+001,2.98751106E+001,4.09612732E+001] C4-12 [8.09316025E+001,2.7799057E+001,4.12611198E+001] C4-13 [8.04624023E+001,2.88711109E+001,4.07331085E+001] C4-14 [8.13773575E+001,2.97510109E+001,4.09169846E+001] C4-15 [8.35310364E+001,2.971031E+001,4.16878052E+001] C5-1 [6.87114258E+001,2.53504581E+001,6.11055298E+001] C5-2 [6.73569031E+001,2.35163498E+001,6.01617432E+001] C5-3 [6.78224869E+001,2.61236534E+001,6.0729248E+001] C5-4 [6.76432266E+001,2.56426888E+001,6.35400085E+001] NAME V ________ ____________________________________________________ C5-5 [6.75377045E+001,2.60873699E+001,6.35584145E+001] C5-6 [6.84944687E+001,2.51576214E+001,6.24934502E+001] C5-7 [6.79246216E+001,2.53722992E+001,6.32098122E+001] C5-8 [6.84075165E+001,2.63778133E+001,6.10950584E+001] C5-9 [6.73214798E+001,2.70551453E+001,6.27835197E+001] C5-10 [6.50006485E+001,2.67408028E+001,6.07828026E+001] C5-11 [6.68869705E+001,2.3982399E+001,6.13440819E+001] C5-12 [6.55524521E+001,2.42231808E+001,6.07235756E+001] C5-13 [6.72140808E+001,2.42842178E+001,6.21546478E+001] C5-14 [6.89587936E+001,2.67715569E+001,6.08621559E+001] C5-15 [6.68405685E+001,2.44039059E+001,6.12652893E+001] C6-1 [5.4925251E+001,8.28179474E+001,3.0869236E+001] C6-2 [5.52922363E+001,8.23375549E+001,2.94804363E+001] C6-3 [5.60466652E+001,8.18454132E+001,2.99774895E+001] NAME V ________ ____________________________________________________ C6-4 [5.74460373E+001,8.26830368E+001,2.86887722E+001] C6-5 [5.57439041E+001,8.14622726E+001,2.94924259E+001] C6-6 [5.4913372E+001,8.48766251E+001,2.92711105E+001] C6-7 [5.66876144E+001,8.25907898E+001,2.84199276E+001] C6-8 [5.6253479E+001,8.3280838E+001,2.69524212E+001] C6-9 [5.50792351E+001,8.37676392E+001,3.08755417E+001] C6-10 [5.57719955E+001,8.11036758E+001,2.92569256E+001] C6-11 [5.60834808E+001,8.3103096E+001,3.09748001E+001] C6-12 [5.58962059E+001,8.3612648E+001,2.95026093E+001] C6-13 [5.73348083E+001,8.26950226E+001,2.88242455E+001] C6-14 [5.45099411E+001,8.33315659E+001,2.90559101E+001] C6-15 [5.57930641E+001,8.5720871E+001,2.92863998E+001] C1-97 [6.3716671E+001,5.41518326E+001,5.18371048E+001] C1-98 [6.58774261E+001,5.32223206E+001,5.05089798E+001] NAME V _________ ____________________________________________________ C1-99 [6.31867676E+001,5.25712204E+001,5.16621819E+001] C1-100 [6.20503845E+001,5.15550919E+001,5.08155479E+001] 100 rows selected.
結果のベクトルを空間に描写するには、次のグラフを考えてみてください:
genvec
表で、生成されたベクトルに対して類似検索を実行します。最初に、問合せでベクトルの名前を形成するために使用する
cluster_number
という変数を定義します。DEFINE cluster_number = '&clusterid'
clusterid
の値を入力するよう求められます。この例では、5
を使用します。Enter value for clusterid: 5
次の問合せを実行して、生成されたベクトルに対して類似検索を実行します。
SELECT name FROM genvec ORDER BY VECTOR_DISTANCE(v,(SELECT v FROM genvec WHERE name='C'||'&cluster_number'),EUCLIDEAN) FETCH EXACT FIRST 20 ROWS ONLY;
出力例:
NAME ________ C5 C5-15 C5-13 C5-3 C5-11 C5-1 C5-8 C5-6 C5-7 C5-12 C5-9 C5-4 C5-2 C5-5 NAME ________ C5-14 C5-10 C4-5 C4-12 C4-4 C4-13 20 rows selected.
- 次に、今回はコサイン距離メトリックを使用して、生成されたベクトルで類似検索を実行する別の例を示します。
SELECT name FROM genvec ORDER BY VECTOR_DISTANCE(v,(SELECT v FROM genvec WHERE name='C'||'&cluster_number'),COSINE) FETCH EXACT FIRST 20 ROWS ONLY;
出力例:
NAME ________ C5 C5-6 C5-12 C5-15 C5-7 C5-4 C5-5 C5-13 C5-11 C5-3 C5-1 C5-8 C5-9 C5-2 NAME ________ C5-14 C5-10 C4-5 C4-4 C4-10 C4-12 20 rows selected.
query_vector
という変数を作成し、SELECT INTO
を使用して変数にベクトル値を格納します。VARIABLE query_vector CLOB BEGIN SELECT v INTO :query_vector FROM genvec WHERE name='C'||'&cluster_number'; END; / PRINT query_vector;
出力例:
QUERY_VECTOR -------------------------------------------------------------------- [6.70646744E+001,2.53395119E+001,6.14522667E+001]
- 前のステップで作成した問合せベクトルを使用して、類似検索の実行計画を作成します。
EXPLAIN PLAN FOR SELECT name FROM genvec ORDER BY VECTOR_DISTANCE(v, :query_vector, EUCLIDEAN) FETCH EXACT FIRST 20 ROWS ONLY; SELECT plan_table_output FROM table(dbms_xplan.display('plan_table',null,'all'));
出力例:PLAN_TABLE_OUTPUT _____________________________________________________________________________________ Plan hash value: 1549136425 ---------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | ---------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 20 | 5040 | 4 (25)| 00:00:01 | |* 1 | COUNT STOPKEY | | | | | | | 2 | VIEW | | 100 | 25200 | 4 (25)| 00:00:01 | |* 3 | SORT ORDER BY STOPKEY| | 100 | 25800 | 4 (25)| 00:00:01 | | 4 | TABLE ACCESS FULL | GENVEC | 100 | 25800 | 3 (0)| 00:00:01 | ---------------------------------------------------------------------------------- Query Block Name / Object Alias (identified by operation id): ------------------------------------------------------------- PLAN_TABLE_OUTPUT ______________________________________________________________ 1 - SEL$2 2 - SEL$1 / "from$_subquery$_002"@"SEL$2" 3 - SEL$1 4 - SEL$1 / "GENVEC"@"SEL$1" Predicate Information (identified by operation id): --------------------------------------------------- 1 - filter(ROWNUM<=20) 3 - filter(ROWNUM<=20) Column Projection Information (identified by operation id): ----------------------------------------------------------- PLAN_TABLE_OUTPUT __________________________________________________________________________ 1 - "from$_subquery$_002"."NAME"[VARCHAR2,500] 2 - "from$_subquery$_002"."NAME"[VARCHAR2,500] 3 - (#keys=1) VECTOR_DISTANCE("V" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+ USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8], "NAME"[VARCHAR2,500] 4 - "NAME"[VARCHAR2,500], VECTOR_DISTANCE("V" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+ USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8] Note ----- - dynamic statistics used: dynamic sampling (level=2) 41 rows selected.
- Hierarchical Navigable Small World (HNSW)索引を作成します。
CREATE VECTOR INDEX genvec_hnsw_idx ON genvec(v) ORGANIZATION INMEMORY NEIGHBOR GRAPH DISTANCE EUCLIDEAN WITH TARGET ACCURACY 95; SELECT INDEX_NAME, INDEX_TYPE, INDEX_SUBTYPE FROM USER_INDEXES;
出力例:
INDEX_NAME INDEX_TYPE INDEX_SUBTYPE ___________________________ _____________ _______________________________ DM$CEDOC_MODEL NORMAL SYS_IL0000073592C00002$$ LOB GENVEC_HNSW_IDX VECTOR INMEMORY_NEIGHBOR_GRAPH_HNSW SYS_C008694 NORMAL 4 rows selected.
VECSYS.VECTOR$INDEX
ビューからHNSW索引に関する情報を問い合せます。SELECT JSON_SERIALIZE(IDX_PARAMS RETURNING VARCHAR2 PRETTY) FROM VECSYS.VECTOR$INDEX WHERE IDX_NAME = 'GENVEC_HNSW_IDX';
出力例:
JSON_SERIALIZE(IDX_PARAMSRETURNINGVARCHAR2PRETTY) ______________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________ { "type" : "HNSW", "num_neighbors" : 32, "efConstruction" : 300, "distance" : "EUCLIDEAN", "accuracy" : 95, "vector_type" : "FLOAT32", "vector_dimension" : 3, "degree_of_parallelism" : 1, "pdb_id" : 3, "indexed_col" : "V" }
VECSYS.VECTOR$INDEX
ビューの列の詳細は、VECSYS.VECTOR$INDEXを参照してください。- HNSW索引を作成したら、
genvec
表で類似検索用の別の実行計画を作成します。EXPLAIN PLAN FOR SELECT name FROM genvec ORDER BY VECTOR_DISTANCE(v, :query_vector, EUCLIDEAN) FETCH APPROX FIRST 20 rows only; SELECT plan_table_output FROM table(dbms_xplan.display('plan_table',null,'all'));
出力例:
PLAN_TABLE_OUTPUT _____________________________________________________________________________________________________________ Plan hash value: 1202819565 ---------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes |TempSpc| Cost (%CPU)| Time | ---------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 20 | 5040 | | 165 (2)| 00:00:01 | |* 1 | COUNT STOPKEY | | | | | | | | 2 | VIEW | | 100 | 25200 | | 165 (2)| 00:00:01 | |* 3 | SORT ORDER BY STOPKEY | | 100 | 425K| 808K| 165 (2)| 00:00:01 | | 4 | TABLE ACCESS BY INDEX ROWID| GENVEC | 100 | 425K| | 1 (0)| 00:00:01 | | 5 | VECTOR INDEX HNSW SCAN | GENVEC_HNSW_IDX | 100 | 425K| | 1 (0)| 00:00:01 | ---------------------------------------------------------------------------------------------------------- Query Block Name / Object Alias (identified by operation id): PLAN_TABLE_OUTPUT ________________________________________________________________ ------------------------------------------------------------- 1 - SEL$2 2 - SEL$1 / "from$_subquery$_002"@"SEL$2" 3 - SEL$1 4 - SEL$1 / "GENVEC"@"SEL$1" 5 - SEL$1 / "GENVEC"@"SEL$1" Predicate Information (identified by operation id): --------------------------------------------------- 1 - filter(ROWNUM<=20) 3 - filter(ROWNUM<=20) PLAN_TABLE_OUTPUT _____________________________________________________________________________________________________ Column Projection Information (identified by operation id): ----------------------------------------------------------- 1 - "from$_subquery$_002"."NAME"[VARCHAR2,500] 2 - "from$_subquery$_002"."NAME"[VARCHAR2,500] 3 - (#keys=1) VECTOR_DISTANCE("V" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+ USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[8], "NAME"[VARCHAR2,500] 4 - "NAME"[VARCHAR2,500], VECTOR_DISTANCE("V" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+ USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[8] 5 - "GENVEC".ROWID[ROWID,10], VECTOR_DISTANCE("V" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+ USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[8] Note ----- PLAN_TABLE_OUTPUT ___________________________________________________________ - dynamic statistics used: dynamic sampling (level=2) 43 rows selected.
ご覧のとおり、実行計画にHNSW索引に関する情報が含まれるようになりました。
genvec
表のベクトルの類似検索を再度実行します。問合せ結果は、使用される索引付け方法に基づいて異なる場合があります。このシナリオに含まれる結果は、単なる例です。SELECT name FROM genvec ORDER BY vector_distance(v, :query_vector, EUCLIDEAN) FETCH APPROX FIRST 20 ROWS ONLY;
出力例:
NAME ________ C5 C5-15 C5-13 C5-3 C5-11 C5-1 C5-8 C5-6 C5-7 C5-12 C5-9 C5-4 C5-2 C5-5 NAME ________ C5-14 C5-10 C4-5 C4-12 C4-4 C4-13 20 rows selected.
- HNSW索引を削除し、Inverted File Flat (IVF)ベクトル索引を作成します。
DROP INDEX genvec_hnsw_idx; CREATE VECTOR INDEX genvec_ivf_idx ON genvec(v) ORGANIZATION NEIGHBOR PARTITIONS DISTANCE EUCLIDEAN WITH TARGET ACCURACY 95; SELECT JSON_SERIALIZE(IDX_PARAMS RETURNING VARCHAR2 PRETTY) FROM VECSYS.VECTOR$INDEX WHERE IDX_NAME = 'GENVEC_IVF_IDX';
出力例:
JSON_SERIALIZE(IDX_PARAMSRETURNINGVARCHAR2PRETTY) ________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________ { "target_centroids" : 40, "pdb_id" : 3, "vector_type" : "FLOAT32", "type" : "IVF_FLAT", "vector_dimension" : 3, "distance" : "EUCLIDEAN", "indexed_col" : "V", "min_vectors_per_partition" : 10, "degree_of_parallelism" : 1, "accuracy" : 95, "num_centroids" : 24, "samples_per_partition" : 256 }
- 再度、新しく作成されたIVF索引に関する情報が含まれる実行計画を作成します。
EXPLAIN PLAN FOR SELECT name FROM genvec ORDER BY VECTOR_DISTANCE(v, :query_vector, EUCLIDEAN) FETCH APPROX FIRST 20 ROWS ONLY; SELECT plan_table_output FROM table(dbms_xplan.display('plan_table',null,'all'));
出力例:
PLAN_TABLE_OUTPUT __________________________________________________________________________________________________________________________________________________________________________ Plan hash value: 2965029064 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 5 | 1260 | 29 (14)| 00:00:01 | | | | 1 | VIEW | | 5 | 1260 | 29 (14)| 00:00:01 | | | | 2 | SORT ORDER BY | | 5 | 21910 | 29 (14)| 00:00:01 | | | |* 3 | HASH JOIN | | 5 | 21910 | 28 (11)| 00:00:01 | | | | 4 | VIEW | VW_IVPSR_11E7D7DE | 20 | 320 | 24 (9)| 00:00:01 | | | |* 5 | COUNT STOPKEY | | | | | | | | | 6 | VIEW | VW_IVPSJ_578B79F1 | 25 | 450 | 24 (9)| 00:00:01 | | | |* 7 | SORT ORDER BY STOPKEY | | 25 | 550 | 24 (9)| 00:00:01 | | | |* 8 | HASH JOIN | | 25 | 550 | 23 (5)| 00:00:01 | | | PLAN_TABLE_OUTPUT __________________________________________________________________________________________________________________________________________________________________________ | 9 | PART JOIN FILTER CREATE | :BF0000 | 6 | 18 | 4 (25)| 00:00:01 | | | | 10 | VIEW | VW_IVCR_B5B87E67 | 6 | 18 | 4 (25)| 00:00:01 | | | |* 11 | COUNT STOPKEY | | | | | | | | | 12 | VIEW | VW_IVCN_9A1D2119 | 24 | 312 | 4 (25)| 00:00:01 | | | |* 13 | SORT ORDER BY STOPKEY | | 24 | 216 | 4 (25)| 00:00:01 | | | | 14 | TABLE ACCESS FULL | VECTOR$GENVEC_IVF_IDX$87355_87370_0$IVF_FLAT_CENTROIDS | 24 | 216 | 3 (0)| 00:00:01 | | | | 15 | PARTITION LIST JOIN-FILTER| | 100 | 1900 | 3 (0)| 00:00:01 |:BF0000|:BF0000| | 16 | TABLE ACCESS FULL | VECTOR$GENVEC_IVF_IDX$87355_87370_0$IVF_FLAT_CENTROID_PARTITIONS | 100 | 1900 | 3 (0)| 00:00:01 |:BF0000|:BF0000| | 17 | TABLE ACCESS FULL | GENVEC | 100 | 426K| 3 (0)| 00:00:01 | | | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- Query Block Name / Object Alias (identified by operation id): ------------------------------------------------------------- PLAN_TABLE_OUTPUT ___________________________________________________________ 1 - SEL$94C0F189 / "from$_subquery$_002"@"SEL$2" 2 - SEL$94C0F189 4 - SEL$E731354C / "VW_IVPSR_11E7D7DE"@"SEL$1" 5 - SEL$E731354C 6 - SEL$0C00A749 / "VW_IVPSJ_578B79F1"@"SEL$E731354C" 7 - SEL$0C00A749 10 - SEL$700CE8F1 / "VW_IVCR_B5B87E67"@"SEL$0C00A749" 11 - SEL$700CE8F1 12 - SEL$E5326247 / "VW_IVCN_9A1D2119"@"SEL$700CE8F1" 13 - SEL$E5326247 14 - SEL$E5326247 / "VTIX_CENTRD"@"SEL$E5326247" 16 - SEL$0C00A749 / "VTIX_CNPART"@"SEL$0C00A749" 17 - SEL$94C0F189 / "GENVEC"@"SEL$1" PLAN_TABLE_OUTPUT ______________________________________________________________________________ Predicate Information (identified by operation id): --------------------------------------------------- 3 - access("GENVEC".ROWID="VW_IVPSR_11E7D7DE"."BASE_TABLE_ROWID") 5 - filter(ROWNUM<=20) 7 - filter(ROWNUM<=20) 8 - access("VW_IVCR_B5B87E67"."CENTROID_ID"="VTIX_CNPART"."CENTROID_ID") 11 - filter(ROWNUM<=6) 13 - filter(ROWNUM<=6) Column Projection Information (identified by operation id): ----------------------------------------------------------- 1 - "from$_subquery$_002"."NAME"[VARCHAR2,500] PLAN_TABLE_OUTPUT __________________________________________________________________________________________________________________________________________________________________ 2 - (#keys=1) "VEC_DIST"[BINARY_DOUBLE,8], "GENVEC"."NAME"[VARCHAR2,500] 3 - (#keys=1) "GENVEC"."NAME"[VARCHAR2,500], "VEC_DIST"[BINARY_DOUBLE,8], "GENVEC"."NAME"[VARCHAR2,500] 4 - "BASE_TABLE_ROWID"[ROWID,10], "VEC_DIST"[BINARY_DOUBLE,8] 5 - "VW_IVPSJ_578B79F1"."BASE_TABLE_ROWID"[ROWID,10], "VW_IVPSJ_578B79F1"."VEC_DIST"[BINARY_DOUBLE,8] 6 - "VW_IVPSJ_578B79F1"."BASE_TABLE_ROWID"[ROWID,10], "VW_IVPSJ_578B79F1"."VEC_DIST"[BINARY_DOUBLE,8] 7 - (#keys=1) VECTOR_DISTANCE("VTIX_CNPART"."DATA_VECTOR" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+ USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8], "VTIX_CNPART"."BASE_TABLE_ROWID"[ROWID,10] 8 - (#keys=1) "VTIX_CNPART"."BASE_TABLE_ROWID"[ROWID,10], VECTOR_DISTANCE("VTIX_CNPART"."DATA_VECTOR" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+ USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8] 9 - "VW_IVCR_B5B87E67"."CENTROID_ID"[NUMBER,22], "VW_IVCR_B5B87E67"."CENTROID_ID"[NUMBER,22] 10 - "CENTROID_ID"[NUMBER,22] 11 - "VW_IVCN_9A1D2119"."CENTROID_ID"[NUMBER,22] 12 - "VW_IVCN_9A1D2119"."CENTROID_ID"[NUMBER,22] 13 - (#keys=1) VECTOR_DISTANCE("VECTOR$GENVEC_IVF_IDX$87355_87370_0$IVF_FLAT_CENTROIDS"."CENTROID_VECTOR" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * PLAN_TABLE_OUTPUT _________________________________________________________________________________________________________________________________________________________________ /*+ USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8], "VTIX_CENTRD"."CENTROID_ID"[NUMBER,22] 14 - "VTIX_CENTRD"."CENTROID_ID"[NUMBER,22], VECTOR_DISTANCE("VECTOR$GENVEC_IVF_IDX$87355_87370_0$IVF_FLAT_CENTROIDS"."CENTROID_VECTOR" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+ USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8] 15 - "VTIX_CNPART"."BASE_TABLE_ROWID"[ROWID,10], "VTIX_CNPART"."CENTROID_ID"[NUMBER,22], VECTOR_DISTANCE("VTIX_CNPART"."DATA_VECTOR" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+ USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8] 16 - "VTIX_CNPART"."BASE_TABLE_ROWID"[ROWID,10], "VTIX_CNPART"."CENTROID_ID"[NUMBER,22], VECTOR_DISTANCE("VTIX_CNPART"."DATA_VECTOR" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+ USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8] 17 - "GENVEC".ROWID[ROWID,10], "GENVEC"."NAME"[VARCHAR2,500] Note ----- - dynamic statistics used: dynamic sampling (level=2) - this is an adaptive plan 83 rows selected.
- 最後に、類似検索を再度実行します。
SELECT name FROM genvec ORDER BY VECTOR_DISTANCE(v, :query_vector, EUCLIDEAN) FETCH APPROX FIRST 20 ROWS ONLY;
出力例:
NAME ________ C5 C5-15 C5-13 C5-3 C5-11 C5-1 C5-8 C5-6 C5-7 C5-12 C5-9 C5-4 C5-2 C5-5 NAME ________ C5-14 C5-10 C4-5 C4-12 C4-4 C4-13 20 rows selected.
親トピック: スタート・ガイド