FLOAT32ベクトル・ジェネレータを使用したSQLクイック・スタート

ベクトル・ジェネレータを作成するPL/SQLプログラムが、問合せおよび結果の例とともに含まれ、ベクトル埋込みモデルを使用せずにOracle AI Vector Searchを簡単に開始できます。

ジェネレータは、指定した数のディメンションおよびクラスタでベクトルをランダムに生成できるPL/SQLプログラムです。各次元値は、指定の最小値と最大値を使用して生成されます。生成プロセスの出力は、genvecという表の移入で、これを使用するなどして、類似検索を試すことができます。

次の手順では、十分な権限(最小でDB_DEVELOPER_ROLEロール)を持つデータベース・アカウントへのアクセス権がすでにあることを前提としています。

ノート:

本番データベースでベクトル・ジェネレータを使用しないでください。このプログラムは、テストおよびデモの目的で使用可能になります。

  1. genvec表を作成します。
    DROP TABLE genvec PURGE;
    
    CREATE TABLE genvec (
      id number,           -- id of the generated vector
      v VECTOR,            -- generated vector
      name VARCHAR2(500),  -- name for the generated vector: C1 to Cn are centroids, Cx_y is vector number y in cluster number x
      nv VECTOR,           -- normalized version of the generated vector
      ly number            -- random number you can use to filter out rows in addition to similarity search on vectors
    );
    
  2. パッケージvector_gen_pkgとその関連パッケージ本体を作成します。

    ここでは、ベクトル・ジェネレータ・パッケージを作成します。

    CREATE OR REPLACE PACKAGE vector_gen_pkg AS
    
      TYPE t_vectors IS TABLE OF vector INDEX BY PLS_INTEGER;
    
      FUNCTION get_coordinate(
        input_string CLOB,
        i PLS_INTEGER
      ) RETURN NUMBER;
    
      PROCEDURE generate_vectors(
        num_vectors IN PLS_INTEGER,  -- Number of vectors to generate
        dimensions IN PLS_INTEGER,   -- Number of dimensions of each vector
        num_clusters IN PLS_INTEGER, -- Number of clusters to create
        cluster_spread IN NUMBER,    -- Relative closeness of each vector in each cluster (using standard deviation)
        min_value IN NUMBER,         -- Minimum value for a vector coordinate
        max_value IN NUMBER          -- Maximum value for a vector coordinate
      );
    
    END vector_gen_pkg;
    /

    また、パッケージ本体を作成します。

    CREATE OR REPLACE PACKAGE BODY vector_gen_pkg AS
    
      -------------------------------------------
      -------------------------------------------
      ---- V E C T O R    G E N E R A T O R -----
      -------------------------------------------
      -------------------------------------------
      -- Version 1.0                    ---------
      -------------------------------------------
      -------------------------------------------
      ---- DO NOT USE ON PRODUCTION DATABASES ---
      ---- ONLY FOR TESTING AND DEMO PURPOSES ---
      -------------------------------------------
    
    
      FUNCTION get_coordinate(
        input_string CLOB,
        i PLS_INTEGER
      ) RETURN NUMBER IS
        start_pos NUMBER;
        end_pos NUMBER;
        comma_pos NUMBER;
        coord VARCHAR2(100);
        comma_count NUMBER := 0;
        commas NUMBER;
        working_string CLOB;
      BEGIN
        -- Remove leading and trailing brackets
        working_string := input_string;
        working_string := TRIM(BOTH '[]' FROM working_string);
        commas := LENGTH(working_string) - LENGTH(REPLACE(working_string, ',', ''));
    
        -- Initialize positions
        start_pos := 1;
        end_pos := INSTR(working_string, ',', start_pos);
      
        IF i<=0 OR i>commas+1 THEN RETURN NULL;
        END IF;
    
        -- Loop through the string to find the i-th coordinate
        LOOP
          IF comma_count + 1 = i THEN
            IF end_pos = 0 THEN
              -- If there's no more comma, the coordinate is the rest of the string
              coord := SUBSTR(working_string, start_pos);
            ELSE
              coord := SUBSTR(working_string, start_pos, end_pos - start_pos);
            END IF;
            RETURN coord;
          END IF;
    
          -- Move to the next coordinate
          comma_count := comma_count + 1;
          start_pos := end_pos + 1;
          end_pos := INSTR(working_string, ',', start_pos);
    
          -- Exit loop if no more coordinates
          EXIT WHEN start_pos > LENGTH(working_string);
        END LOOP;
    
        -- If the function hasn't returned yet, the index was out of bounds
        RETURN NULL;
    
      END;
    
    
    
      PROCEDURE generate_random_vector(
        dimensions IN PLS_INTEGER,
        min_value IN NUMBER,
        max_value IN NUMBER,
        vec OUT vector
        ) IS
        e CLOB;
      BEGIN
        e := '[';
        FOR i IN 1..dimensions-1 LOOP
          e := e || DBMS_RANDOM.VALUE(min_value, max_value) ||',';
        END LOOP;
        e := e || DBMS_RANDOM.VALUE(min_value, max_value) ||']';
        vec := VECTOR(e);
      END generate_random_vector;
    
    
    
      PROCEDURE generate_clustered_vector(
        centroid IN vector,
        cluster_spread IN NUMBER,
        vec OUT vector
      ) IS
        e CLOB;
        d number;
        BEGIN
          d := VECTOR_DIMENSION_COUNT(centroid);
          e := '[';
          FOR i IN 1 .. d-1 LOOP
            e := e || (get_coordinate(to_clob(centroid),i) + (DBMS_RANDOM.NORMAL * cluster_spread)) ||',';
          END LOOP;
          e := e || (get_coordinate(to_clob(centroid),VECTOR_DIMENSION_COUNT(centroid)) + (DBMS_RANDOM.NORMAL * cluster_spread)) || ']';
          vec := VECTOR(e);
      END generate_clustered_vector;
    
    
    
      FUNCTION normalize_vector(vec IN vector) RETURN vector IS
        e CLOB;
        v CLOB;
        n number;
        d number;
      BEGIN
        n := VECTOR_NORM(vec);
        v := to_clob(vec);
        d := VECTOR_DIMENSION_COUNT(vec);
        e := '[';
        FOR i IN 1 .. d-1 LOOP
          e := e || (get_coordinate(v,i)/n) ||',';
        END LOOP;
        e := e || (get_coordinate(v,d)/n) || ']';
        RETURN VECTOR(e);
      END normalize_vector;
    
    
    
      PROCEDURE generate_vectors(
        num_vectors IN PLS_INTEGER,  -- Must be 1 or above
        dimensions IN PLS_INTEGER,   -- Must be above 1 but less than 500
        num_clusters IN PLS_INTEGER, -- Must be 1 or above
        cluster_spread IN NUMBER,    -- Must be grather than 0
        min_value IN NUMBER,
        max_value IN NUMBER
      ) IS
        centroids t_vectors;
        vectors_per_cluster PLS_INTEGER;
        remaining_vectors PLS_INTEGER;
        vec vector;
        idx PLS_INTEGER := 1;
        max_id NUMBER;
        working_vector VECTOR;
    
      BEGIN
        IF (num_vectors) <=0 OR (num_clusters < 1) OR (num_vectors < num_clusters) OR (dimensions <= 0) OR (dimensions > 500) OR (cluster_spread <= 0) OR (min_value >= max_value) THEN RETURN;
        END IF;
    
        SELECT MAX(id) INTO max_id FROM genvec;
        IF max_id IS NULL THEN max_id := 0;
        END IF;
    
        -- Generate cluster centroids
        FOR i IN 1..num_clusters LOOP
    
          generate_random_vector(dimensions, min_value, max_value, centroids(i));
          working_vector := normalize_vector(centroids(i));
          INSERT INTO genvec VALUES (max_id + idx, centroids(i), 'C'||i, working_vector, DBMS_RANDOM.VALUE(3,600000000));
          idx := idx + 1;
    
        END LOOP;
    
        -- Calculate vectors per cluster
        vectors_per_cluster := TRUNC(num_vectors / num_clusters);
        remaining_vectors := num_vectors MOD num_clusters;
    
        -- Generate vectors for each cluster
        IF vectors_per_cluster > 1 THEN
          FOR i IN 1..num_clusters LOOP
            FOR j IN 1..(vectors_per_cluster - 1) LOOP
              generate_clustered_vector(centroids(i), cluster_spread, vec);
              working_vector := normalize_vector(vec);
              INSERT INTO genvec VALUES (max_id + idx, vec, 'C'||i||'-'||j, working_vector, DBMS_RANDOM.VALUE(3,600000000));
              idx := idx + 1;
            END LOOP;
          END LOOP;
        END IF;
    
        -- Handle remaining vectors: all associated with cluster 1
        IF remaining_vectors > 0 THEN
          FOR j IN 1..remaining_vectors LOOP
            generate_clustered_vector(centroids(1), cluster_spread, vec);
            working_vector := normalize_vector(vec);
            INSERT INTO genvec VALUES (max_id + idx, vec, 'C1-'||idx, working_vector, DBMS_RANDOM.VALUE(3,600000000));
            idx := idx + 1;
          END LOOP;
        END IF;
        COMMIT;
      END generate_vectors;
    
    END vector_gen_pkg;
    /
  3. ベクトル・ジェネレータを設定したら、これと次の手順を実行して、その使用方法を理解できます。

    サンプル値を使用して、vector_gen_pkgパッケージのgenerate_vectorsプロシージャを実行します。

    BEGIN
      vector_gen_pkg.generate_vectors(
        num_vectors => 100,   -- Number of vectors to generate. Must be 1 or above
        dimensions => 3,      -- Number of dimensions of each vector. Must be above 1 but less than 500
        num_clusters => 6,    -- Number of clusters to create. Must be 1 or above
        cluster_spread => 1,  -- Relative closeness of each vector in each cluster (using standard deviation). Must be grather than 0
        min_value => 0,       -- Minimum value for a vector coordinate
        max_value => 100      -- Maximum value for a vector coordinate. Min value must be smaller than max value
      );
    END;
    /
  4. SELECT文を実行して、新しく生成されたベクトルを表示します。
    SELECT name, v FROM genvec;

    出力例:

    NAME    V
    _______ ____________________________________________________
    C1      [6.35792809E+001,5.28954163E+001,5.16500435E+001]
    C2      [5.67991257E+001,5.00640755E+001,2.3642437E+001]
    C3      [2.42510891E+001,5.36970367E+001,6.88145638E+000]
    C4      [8.13146515E+001,2.88190498E+001,4.09245186E+001]
    C5      [6.70646744E+001,2.53395119E+001,6.14522667E+001]
    C6      [5.60192604E+001,8.31662598E+001,2.93592377E+001]
    C1-1    [6.4469986E+001,5.25044632E+001,5.22250557E+001]
    C1-2    [6.31295433E+001,5.21443062E+001,5.02242126E+001]
    C1-3    [6.25154915E+001,5.25730362E+001,5.2617794E+001]
    C1-4    [6.3491375E+001,5.21440697E+001,5.06722069E+001]
    C1-5    [6.21516266E+001,5.32161064E+001,5.23233032E+001]
    C1-6    [6.2913269E+001,5.16970291E+001,5.16683655E+001]
    C1-7    [6.22267456E+001,5.40408363E+001,5.0272541E+001]
    C1-8    [6.1414093E+001,5.28870888E+001,5.27458E+001]
    
    NAME     V
    ________ ____________________________________________________
    C1-9     [6.29652252E+001,5.32767754E+001,5.27030106E+001]
    C1-10    [6.35940704E+001,5.27265244E+001,5.23180656E+001]
    C1-11    [6.34133224E+001,5.39401283E+001,5.29368248E+001]
    C1-12    [6.18856697E+001,5.31113129E+001,5.18861504E+001]
    C1-13    [6.32378883E+001,5.30308647E+001,5.04571724E+001]
    C1-14    [6.18148689E+001,5.33705482E+001,5.29123802E+001]
    C1-15    [6.43224258E+001,5.23124084E+001,5.21299057E+001]
    C2-1     [5.59053535E+001,5.20054626E+001,2.28595486E+001]
    C2-2     [5.71644516E+001,5.13243408E+001,2.31167526E+001]
    C2-3     [5.66626244E+001,5.00615959E+001,2.27138176E+001]
    C2-4     [5.73383865E+001,5.04509125E+001,2.36539135E+001]
    C2-5     [5.6621357E+001,5.01576767E+001,2.38867531E+001]
    C2-6     [5.59768562E+001,5.17590942E+001,2.49088764E+001]
    C2-7     [5.64437904E+001,4.71531525E+001,2.23245487E+001]
    
    NAME     V
    ________ ____________________________________________________
    C2-8     [5.81449051E+001,5.09049644E+001,2.29072056E+001]
    C2-9     [5.37190132E+001,4.87386665E+001,2.28188381E+001]
    C2-10    [5.77416382E+001,4.93461685E+001,2.32014389E+001]
    C2-11    [5.68353958E+001,5.11093979E+001,2.43693123E+001]
    C2-12    [5.79631157E+001,5.0297657E+001,2.28039799E+001]
    C2-13    [5.57930183E+001,5.11965866E+001,2.35887661E+001]
    C2-14    [5.57345848E+001,5.03228951E+001,2.30780907E+001]
    C2-15    [5.69435997E+001,4.8590435E+001,2.58747597E+001]
    C3-1     [2.40239315E+001,5.2352993E+001,5.63517284E+000]
    C3-2     [2.39717846E+001,5.30635986E+001,5.86633539E+000]
    C3-3     [2.70314407E+001,5.48788643E+001,7.96345377E+000]
    C3-4     [2.39875908E+001,5.39634552E+001,5.87654877E+000]
    C3-5     [2.47772026E+001,5.2187336E+001,6.83652115E+000]
    C3-6     [2.32920208E+001,5.41494293E+001,6.40737772E+000]
    
    NAME     V
    ________ ____________________________________________________
    C3-7     [2.46129742E+001,5.32308769E+001,6.29999685E+000]
    C3-8     [2.51000671E+001,5.33271561E+001,8.86797047E+000]
    C3-9     [2.4337059E+001,5.26281281E+001,6.9616766E+000]
    C3-10    [2.39770508E+001,5.42386856E+001,5.63018417E+000]
    C3-11    [2.59837551E+001,5.34013176E+001,6.97773361E+000]
    C3-12    [2.40400314E+001,5.25649719E+001,7.2636981E+000]
    C3-13    [2.13184013E+001,5.28633308E+001,8.3834734E+000]
    C3-14    [2.50075855E+001,5.21548729E+001,6.88196087E+000]
    C3-15    [2.53695087E+001,5.60495186E+001,6.76059389E+000]
    C4-1     [8.28819885E+001,2.95163822E+001,4.03809738E+001]
    C4-2     [8.18269348E+001,2.95735188E+001,3.99435768E+001]
    C4-3     [8.2709259E+001,2.90755043E+001,4.07345886E+001]
    C4-4     [8.18622665E+001,2.88013916E+001,4.1822567E+001]
    C4-5     [7.99165421E+001,2.89941139E+001,4.09653854E+001]
    
    NAME     V
    ________ ____________________________________________________
    C4-6     [8.12936707E+001,2.98655643E+001,4.00380211E+001]
    C4-7     [8.21705704E+001,2.90163479E+001,3.94858704E+001]
    C4-8     [8.20081329E+001,2.89751148E+001,4.1045887E+001]
    C4-9     [8.25486298E+001,2.84143009E+001,4.15654945E+001]
    C4-10    [8.22034149E+001,2.92223415E+001,4.20033302E+001]
    C4-11    [8.2048996E+001,2.98751106E+001,4.09612732E+001]
    C4-12    [8.09316025E+001,2.7799057E+001,4.12611198E+001]
    C4-13    [8.04624023E+001,2.88711109E+001,4.07331085E+001]
    C4-14    [8.13773575E+001,2.97510109E+001,4.09169846E+001]
    C4-15    [8.35310364E+001,2.971031E+001,4.16878052E+001]
    C5-1     [6.87114258E+001,2.53504581E+001,6.11055298E+001]
    C5-2     [6.73569031E+001,2.35163498E+001,6.01617432E+001]
    C5-3     [6.78224869E+001,2.61236534E+001,6.0729248E+001]
    C5-4     [6.76432266E+001,2.56426888E+001,6.35400085E+001]
    
    NAME     V
    ________ ____________________________________________________
    C5-5     [6.75377045E+001,2.60873699E+001,6.35584145E+001]
    C5-6     [6.84944687E+001,2.51576214E+001,6.24934502E+001]
    C5-7     [6.79246216E+001,2.53722992E+001,6.32098122E+001]
    C5-8     [6.84075165E+001,2.63778133E+001,6.10950584E+001]
    C5-9     [6.73214798E+001,2.70551453E+001,6.27835197E+001]
    C5-10    [6.50006485E+001,2.67408028E+001,6.07828026E+001]
    C5-11    [6.68869705E+001,2.3982399E+001,6.13440819E+001]
    C5-12    [6.55524521E+001,2.42231808E+001,6.07235756E+001]
    C5-13    [6.72140808E+001,2.42842178E+001,6.21546478E+001]
    C5-14    [6.89587936E+001,2.67715569E+001,6.08621559E+001]
    C5-15    [6.68405685E+001,2.44039059E+001,6.12652893E+001]
    C6-1     [5.4925251E+001,8.28179474E+001,3.0869236E+001]
    C6-2     [5.52922363E+001,8.23375549E+001,2.94804363E+001]
    C6-3     [5.60466652E+001,8.18454132E+001,2.99774895E+001]
    
    NAME     V
    ________ ____________________________________________________
    C6-4     [5.74460373E+001,8.26830368E+001,2.86887722E+001]
    C6-5     [5.57439041E+001,8.14622726E+001,2.94924259E+001]
    C6-6     [5.4913372E+001,8.48766251E+001,2.92711105E+001]
    C6-7     [5.66876144E+001,8.25907898E+001,2.84199276E+001]
    C6-8     [5.6253479E+001,8.3280838E+001,2.69524212E+001]
    C6-9     [5.50792351E+001,8.37676392E+001,3.08755417E+001]
    C6-10    [5.57719955E+001,8.11036758E+001,2.92569256E+001]
    C6-11    [5.60834808E+001,8.3103096E+001,3.09748001E+001]
    C6-12    [5.58962059E+001,8.3612648E+001,2.95026093E+001]
    C6-13    [5.73348083E+001,8.26950226E+001,2.88242455E+001]
    C6-14    [5.45099411E+001,8.33315659E+001,2.90559101E+001]
    C6-15    [5.57930641E+001,8.5720871E+001,2.92863998E+001]
    C1-97    [6.3716671E+001,5.41518326E+001,5.18371048E+001]
    C1-98    [6.58774261E+001,5.32223206E+001,5.05089798E+001]
    
    NAME      V
    _________ ____________________________________________________
    C1-99     [6.31867676E+001,5.25712204E+001,5.16621819E+001]
    C1-100    [6.20503845E+001,5.15550919E+001,5.08155479E+001]
    
    100 rows selected.

    結果のベクトルを空間に描写するには、次のグラフを考えてみてください:

    図3-1 ベクトル・クラスタ



  5. genvec表で、生成されたベクトルに対して類似検索を実行します。

    最初に、問合せでベクトルの名前を形成するために使用するcluster_numberという変数を定義します。

    DEFINE cluster_number = '&clusterid'

    clusteridの値を入力するよう求められます。この例では、5を使用します。

    Enter value for clusterid: 5

    次の問合せを実行して、生成されたベクトルに対して類似検索を実行します。

    SELECT name
      FROM genvec
      ORDER BY VECTOR_DISTANCE(v,(SELECT v FROM genvec WHERE name='C'||'&cluster_number'),EUCLIDEAN)
      FETCH EXACT FIRST 20 ROWS ONLY;

    出力例:

    NAME
    ________
    C5
    C5-15
    C5-13
    C5-3
    C5-11
    C5-1
    C5-8
    C5-6
    C5-7
    C5-12
    C5-9
    C5-4
    C5-2
    C5-5
    
    NAME
    ________
    C5-14
    C5-10
    C4-5
    C4-12
    C4-4
    C4-13
    
    20 rows selected.
  6. 次に、今回はコサイン距離メトリックを使用して、生成されたベクトルで類似検索を実行する別の例を示します。
    SELECT name
      FROM genvec
      ORDER BY VECTOR_DISTANCE(v,(SELECT v FROM genvec WHERE name='C'||'&cluster_number'),COSINE)
      FETCH EXACT FIRST 20 ROWS ONLY;

    出力例:

    NAME
    ________
    C5
    C5-6
    C5-12
    C5-15
    C5-7
    C5-4
    C5-5
    C5-13
    C5-11
    C5-3
    C5-1
    C5-8
    C5-9
    C5-2
    
    NAME
    ________
    C5-14
    C5-10
    C4-5
    C4-4
    C4-10
    C4-12
    
    20 rows selected.
  7. query_vectorという変数を作成し、SELECT INTOを使用して変数にベクトル値を格納します。
    VARIABLE query_vector CLOB
    
    BEGIN
      SELECT v INTO :query_vector 
      FROM genvec 
      WHERE name='C'||'&cluster_number';
    END;
    /
    
    PRINT query_vector;

    出力例:

    QUERY_VECTOR
    --------------------------------------------------------------------
    [6.70646744E+001,2.53395119E+001,6.14522667E+001]
    
  8. 前のステップで作成した問合せベクトルを使用して、類似検索の実行計画を作成します。
    EXPLAIN PLAN FOR
      SELECT name
      FROM genvec
      ORDER BY VECTOR_DISTANCE(v, :query_vector, EUCLIDEAN)
      FETCH EXACT FIRST 20 ROWS ONLY;
    
    SELECT plan_table_output
      FROM table(dbms_xplan.display('plan_table',null,'all'));
    出力例:
    PLAN_TABLE_OUTPUT
    _____________________________________________________________________________________
    Plan hash value: 1549136425
    
    ----------------------------------------------------------------------------------
    | Id  | Operation               | Name   | Rows  | Bytes | Cost (%CPU)| Time     |
    ----------------------------------------------------------------------------------
    |   0 | SELECT STATEMENT        |        |    20 |  5040 |     4  (25)| 00:00:01 |
    |*  1 |  COUNT STOPKEY          |        |       |       |            |          |
    |   2 |   VIEW                  |        |   100 | 25200 |     4  (25)| 00:00:01 |
    |*  3 |    SORT ORDER BY STOPKEY|        |   100 | 25800 |     4  (25)| 00:00:01 |
    |   4 |     TABLE ACCESS FULL   | GENVEC |   100 | 25800 |     3   (0)| 00:00:01 |
    ----------------------------------------------------------------------------------
    
    Query Block Name / Object Alias (identified by operation id):
    -------------------------------------------------------------
    
    PLAN_TABLE_OUTPUT
    ______________________________________________________________
    
       1 - SEL$2
       2 - SEL$1 / "from$_subquery$_002"@"SEL$2"
       3 - SEL$1
       4 - SEL$1 / "GENVEC"@"SEL$1"
    
    Predicate Information (identified by operation id):
    ---------------------------------------------------
    
       1 - filter(ROWNUM<=20)
       3 - filter(ROWNUM<=20)
    
    Column Projection Information (identified by operation id):
    -----------------------------------------------------------
    
    PLAN_TABLE_OUTPUT
    __________________________________________________________________________
    
       1 - "from$_subquery$_002"."NAME"[VARCHAR2,500]
       2 - "from$_subquery$_002"."NAME"[VARCHAR2,500]
       3 - (#keys=1) VECTOR_DISTANCE("V" /*+ LOB_BY_VALUE */ ,
           VECTOR(:QUERY_VECTOR, *, * /*+  USEBLOBPCW_QVCGMD */ ),
           EUCLIDEAN)[BINARY_DOUBLE,8], "NAME"[VARCHAR2,500]
       4 - "NAME"[VARCHAR2,500], VECTOR_DISTANCE("V" /*+ LOB_BY_VALUE */ ,
           VECTOR(:QUERY_VECTOR, *, * /*+  USEBLOBPCW_QVCGMD */ ),
           EUCLIDEAN)[BINARY_DOUBLE,8]
    
    Note
    -----
       - dynamic statistics used: dynamic sampling (level=2)
    
    41 rows selected.
  9. Hierarchical Navigable Small World (HNSW)索引を作成します。
    CREATE VECTOR INDEX genvec_hnsw_idx ON genvec(v)
      ORGANIZATION INMEMORY NEIGHBOR GRAPH
      DISTANCE EUCLIDEAN
      WITH TARGET ACCURACY 95;
    
    SELECT INDEX_NAME, INDEX_TYPE, INDEX_SUBTYPE FROM USER_INDEXES;

    出力例:

    INDEX_NAME                  INDEX_TYPE    INDEX_SUBTYPE
    ___________________________ _____________ _______________________________
    DM$CEDOC_MODEL              NORMAL
    SYS_IL0000073592C00002$$    LOB
    GENVEC_HNSW_IDX             VECTOR        INMEMORY_NEIGHBOR_GRAPH_HNSW
    SYS_C008694                 NORMAL
    
    4 rows selected.
  10. VECSYS.VECTOR$INDEXビューからHNSW索引に関する情報を問い合せます。
    SELECT JSON_SERIALIZE(IDX_PARAMS RETURNING VARCHAR2 PRETTY)
      FROM VECSYS.VECTOR$INDEX 
      WHERE IDX_NAME = 'GENVEC_HNSW_IDX';

    出力例:

    JSON_SERIALIZE(IDX_PARAMSRETURNINGVARCHAR2PRETTY)
    ______________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________
    {
      "type" : "HNSW",
      "num_neighbors" : 32,
      "efConstruction" : 300,
      "distance" : "EUCLIDEAN",
      "accuracy" : 95,
      "vector_type" : "FLOAT32",
      "vector_dimension" : 3,
      "degree_of_parallelism" : 1,
      "pdb_id" : 3,
      "indexed_col" : "V"
    }

    VECSYS.VECTOR$INDEXビューの列の詳細は、VECSYS.VECTOR$INDEXを参照してください。

  11. HNSW索引を作成したら、genvec表で類似検索用の別の実行計画を作成します。
    EXPLAIN PLAN FOR
      SELECT name
      FROM genvec
      ORDER BY VECTOR_DISTANCE(v, :query_vector, EUCLIDEAN)
      FETCH APPROX FIRST 20 rows only;
    
    SELECT plan_table_output 
    FROM table(dbms_xplan.display('plan_table',null,'all'));

    出力例:

    PLAN_TABLE_OUTPUT
    _____________________________________________________________________________________________________________
    Plan hash value: 1202819565
    
    ----------------------------------------------------------------------------------------------------------
    | Id  | Operation                      | Name            | Rows  | Bytes |TempSpc| Cost (%CPU)| Time     |
    ----------------------------------------------------------------------------------------------------------
    |   0 | SELECT STATEMENT               |                 |    20 |  5040 |       |   165   (2)| 00:00:01 |
    |*  1 |  COUNT STOPKEY                 |                 |       |       |       |            |          |
    |   2 |   VIEW                         |                 |   100 | 25200 |       |   165   (2)| 00:00:01 |
    |*  3 |    SORT ORDER BY STOPKEY       |                 |   100 |   425K|   808K|   165   (2)| 00:00:01 |
    |   4 |     TABLE ACCESS BY INDEX ROWID| GENVEC          |   100 |   425K|       |     1   (0)| 00:00:01 |
    |   5 |      VECTOR INDEX HNSW SCAN    | GENVEC_HNSW_IDX |   100 |   425K|       |     1   (0)| 00:00:01 |
    ----------------------------------------------------------------------------------------------------------
    
    Query Block Name / Object Alias (identified by operation id):
    
    PLAN_TABLE_OUTPUT
    ________________________________________________________________
    -------------------------------------------------------------
    
       1 - SEL$2
       2 - SEL$1 / "from$_subquery$_002"@"SEL$2"
       3 - SEL$1
       4 - SEL$1 / "GENVEC"@"SEL$1"
       5 - SEL$1 / "GENVEC"@"SEL$1"
    
    Predicate Information (identified by operation id):
    ---------------------------------------------------
    
       1 - filter(ROWNUM<=20)
       3 - filter(ROWNUM<=20)
    
    
    PLAN_TABLE_OUTPUT
    _____________________________________________________________________________________________________
    Column Projection Information (identified by operation id):
    -----------------------------------------------------------
    
       1 - "from$_subquery$_002"."NAME"[VARCHAR2,500]
       2 - "from$_subquery$_002"."NAME"[VARCHAR2,500]
       3 - (#keys=1) VECTOR_DISTANCE("V" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+
           USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[8], "NAME"[VARCHAR2,500]
       4 - "NAME"[VARCHAR2,500], VECTOR_DISTANCE("V" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, *
           /*+  USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[8]
       5 - "GENVEC".ROWID[ROWID,10], VECTOR_DISTANCE("V" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR,
           *, * /*+  USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[8]
    
    Note
    -----
    
    PLAN_TABLE_OUTPUT
    ___________________________________________________________
       - dynamic statistics used: dynamic sampling (level=2)
    
    43 rows selected.
    

    ご覧のとおり、実行計画にHNSW索引に関する情報が含まれるようになりました。

  12. genvec表のベクトルの類似検索を再度実行します。問合せ結果は、使用される索引付け方法に基づいて異なる場合があります。このシナリオに含まれる結果は、単なる例です。
    SELECT name
      FROM genvec
      ORDER BY vector_distance(v, :query_vector, EUCLIDEAN)
      FETCH APPROX FIRST 20 ROWS ONLY;

    出力例:

    NAME
    ________
    C5
    C5-15
    C5-13
    C5-3
    C5-11
    C5-1
    C5-8
    C5-6
    C5-7
    C5-12
    C5-9
    C5-4
    C5-2
    C5-5
    
    NAME
    ________
    C5-14
    C5-10
    C4-5
    C4-12
    C4-4
    C4-13
    
    20 rows selected.
  13. HNSW索引を削除し、Inverted File Flat (IVF)ベクトル索引を作成します。
    DROP INDEX genvec_hnsw_idx;
    
    CREATE VECTOR INDEX genvec_ivf_idx ON genvec(v) 
      ORGANIZATION NEIGHBOR PARTITIONS
      DISTANCE EUCLIDEAN
      WITH TARGET ACCURACY 95;
    
    SELECT JSON_SERIALIZE(IDX_PARAMS RETURNING VARCHAR2 PRETTY)
      FROM VECSYS.VECTOR$INDEX WHERE IDX_NAME = 'GENVEC_IVF_IDX';

    出力例:

    JSON_SERIALIZE(IDX_PARAMSRETURNINGVARCHAR2PRETTY)
    ________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________
    {
      "target_centroids" : 40,
      "pdb_id" : 3,
      "vector_type" : "FLOAT32",
      "type" : "IVF_FLAT",
      "vector_dimension" : 3,
      "distance" : "EUCLIDEAN",
      "indexed_col" : "V",
      "min_vectors_per_partition" : 10,
      "degree_of_parallelism" : 1,
      "accuracy" : 95,
      "num_centroids" : 24,
      "samples_per_partition" : 256
    }
  14. 再度、新しく作成されたIVF索引に関する情報が含まれる実行計画を作成します。
    EXPLAIN PLAN FOR
      SELECT name
      FROM genvec
      ORDER BY VECTOR_DISTANCE(v, :query_vector, EUCLIDEAN)
      FETCH APPROX FIRST 20 ROWS ONLY;
    
    SELECT plan_table_output 
      FROM table(dbms_xplan.display('plan_table',null,'all'));

    出力例:

    PLAN_TABLE_OUTPUT
    __________________________________________________________________________________________________________________________________________________________________________
    Plan hash value: 2965029064
    
    -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
    | Id  | Operation                          | Name                                                             | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |
    -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
    |   0 | SELECT STATEMENT                   |                                                                  |     5 |  1260 |    29  (14)| 00:00:01 |       |       |
    |   1 |  VIEW                              |                                                                  |     5 |  1260 |    29  (14)| 00:00:01 |       |       |
    |   2 |   SORT ORDER BY                    |                                                                  |     5 | 21910 |    29  (14)| 00:00:01 |       |       |
    |*  3 |    HASH JOIN                       |                                                                  |     5 | 21910 |    28  (11)| 00:00:01 |       |       |
    |   4 |     VIEW                           | VW_IVPSR_11E7D7DE                                                |    20 |   320 |    24   (9)| 00:00:01 |       |       |
    |*  5 |      COUNT STOPKEY                 |                                                                  |       |       |            |          |       |       |
    |   6 |       VIEW                         | VW_IVPSJ_578B79F1                                                |    25 |   450 |    24   (9)| 00:00:01 |       |       |
    |*  7 |        SORT ORDER BY STOPKEY       |                                                                  |    25 |   550 |    24   (9)| 00:00:01 |       |       |
    |*  8 |         HASH JOIN                  |                                                                  |    25 |   550 |    23   (5)| 00:00:01 |       |       |
    
    PLAN_TABLE_OUTPUT
    __________________________________________________________________________________________________________________________________________________________________________
    |   9 |          PART JOIN FILTER CREATE   | :BF0000                                                          |     6 |    18 |     4  (25)| 00:00:01 |       |       |
    |  10 |           VIEW                     | VW_IVCR_B5B87E67                                                 |     6 |    18 |     4  (25)| 00:00:01 |       |       |
    |* 11 |            COUNT STOPKEY           |                                                                  |       |       |            |          |       |       |
    |  12 |             VIEW                   | VW_IVCN_9A1D2119                                                 |    24 |   312 |     4  (25)| 00:00:01 |       |       |
    |* 13 |              SORT ORDER BY STOPKEY |                                                                  |    24 |   216 |     4  (25)| 00:00:01 |       |       |
    |  14 |               TABLE ACCESS FULL    | VECTOR$GENVEC_IVF_IDX$87355_87370_0$IVF_FLAT_CENTROIDS           |    24 |   216 |     3   (0)| 00:00:01 |       |       |
    |  15 |          PARTITION LIST JOIN-FILTER|                                                                  |   100 |  1900 |     3   (0)| 00:00:01 |:BF0000|:BF0000|
    |  16 |           TABLE ACCESS FULL        | VECTOR$GENVEC_IVF_IDX$87355_87370_0$IVF_FLAT_CENTROID_PARTITIONS |   100 |  1900 |     3   (0)| 00:00:01 |:BF0000|:BF0000|
    |  17 |     TABLE ACCESS FULL              | GENVEC                                                           |   100 |   426K|     3   (0)| 00:00:01 |       |       |
    -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
    
    Query Block Name / Object Alias (identified by operation id):
    -------------------------------------------------------------
    
    
    PLAN_TABLE_OUTPUT
    ___________________________________________________________
       1 - SEL$94C0F189 / "from$_subquery$_002"@"SEL$2"
       2 - SEL$94C0F189
       4 - SEL$E731354C / "VW_IVPSR_11E7D7DE"@"SEL$1"
       5 - SEL$E731354C
       6 - SEL$0C00A749 / "VW_IVPSJ_578B79F1"@"SEL$E731354C"
       7 - SEL$0C00A749
      10 - SEL$700CE8F1 / "VW_IVCR_B5B87E67"@"SEL$0C00A749"
      11 - SEL$700CE8F1
      12 - SEL$E5326247 / "VW_IVCN_9A1D2119"@"SEL$700CE8F1"
      13 - SEL$E5326247
      14 - SEL$E5326247 / "VTIX_CENTRD"@"SEL$E5326247"
      16 - SEL$0C00A749 / "VTIX_CNPART"@"SEL$0C00A749"
      17 - SEL$94C0F189 / "GENVEC"@"SEL$1"
    
    
    PLAN_TABLE_OUTPUT
    ______________________________________________________________________________
    Predicate Information (identified by operation id):
    ---------------------------------------------------
    
       3 - access("GENVEC".ROWID="VW_IVPSR_11E7D7DE"."BASE_TABLE_ROWID")
       5 - filter(ROWNUM<=20)
       7 - filter(ROWNUM<=20)
       8 - access("VW_IVCR_B5B87E67"."CENTROID_ID"="VTIX_CNPART"."CENTROID_ID")
      11 - filter(ROWNUM<=6)
      13 - filter(ROWNUM<=6)
    
    Column Projection Information (identified by operation id):
    -----------------------------------------------------------
    
       1 - "from$_subquery$_002"."NAME"[VARCHAR2,500]
    
    PLAN_TABLE_OUTPUT
    __________________________________________________________________________________________________________________________________________________________________
       2 - (#keys=1) "VEC_DIST"[BINARY_DOUBLE,8], "GENVEC"."NAME"[VARCHAR2,500]
       3 - (#keys=1) "GENVEC"."NAME"[VARCHAR2,500], "VEC_DIST"[BINARY_DOUBLE,8], "GENVEC"."NAME"[VARCHAR2,500]
       4 - "BASE_TABLE_ROWID"[ROWID,10], "VEC_DIST"[BINARY_DOUBLE,8]
       5 - "VW_IVPSJ_578B79F1"."BASE_TABLE_ROWID"[ROWID,10], "VW_IVPSJ_578B79F1"."VEC_DIST"[BINARY_DOUBLE,8]
       6 - "VW_IVPSJ_578B79F1"."BASE_TABLE_ROWID"[ROWID,10], "VW_IVPSJ_578B79F1"."VEC_DIST"[BINARY_DOUBLE,8]
       7 - (#keys=1) VECTOR_DISTANCE("VTIX_CNPART"."DATA_VECTOR" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+  USEBLOBPCW_QVCGMD */ ),
           EUCLIDEAN)[BINARY_DOUBLE,8], "VTIX_CNPART"."BASE_TABLE_ROWID"[ROWID,10]
       8 - (#keys=1) "VTIX_CNPART"."BASE_TABLE_ROWID"[ROWID,10], VECTOR_DISTANCE("VTIX_CNPART"."DATA_VECTOR" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, * /*+
           USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8]
       9 - "VW_IVCR_B5B87E67"."CENTROID_ID"[NUMBER,22], "VW_IVCR_B5B87E67"."CENTROID_ID"[NUMBER,22]
      10 - "CENTROID_ID"[NUMBER,22]
      11 - "VW_IVCN_9A1D2119"."CENTROID_ID"[NUMBER,22]
      12 - "VW_IVCN_9A1D2119"."CENTROID_ID"[NUMBER,22]
      13 - (#keys=1) VECTOR_DISTANCE("VECTOR$GENVEC_IVF_IDX$87355_87370_0$IVF_FLAT_CENTROIDS"."CENTROID_VECTOR" /*+ LOB_BY_VALUE */ , VECTOR(:QUERY_VECTOR, *, *
    
    PLAN_TABLE_OUTPUT
    _________________________________________________________________________________________________________________________________________________________________
           /*+  USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8], "VTIX_CENTRD"."CENTROID_ID"[NUMBER,22]
      14 - "VTIX_CENTRD"."CENTROID_ID"[NUMBER,22], VECTOR_DISTANCE("VECTOR$GENVEC_IVF_IDX$87355_87370_0$IVF_FLAT_CENTROIDS"."CENTROID_VECTOR" /*+ LOB_BY_VALUE */
           , VECTOR(:QUERY_VECTOR, *, * /*+  USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8]
      15 - "VTIX_CNPART"."BASE_TABLE_ROWID"[ROWID,10], "VTIX_CNPART"."CENTROID_ID"[NUMBER,22], VECTOR_DISTANCE("VTIX_CNPART"."DATA_VECTOR" /*+ LOB_BY_VALUE */ ,
           VECTOR(:QUERY_VECTOR, *, * /*+  USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8]
      16 - "VTIX_CNPART"."BASE_TABLE_ROWID"[ROWID,10], "VTIX_CNPART"."CENTROID_ID"[NUMBER,22], VECTOR_DISTANCE("VTIX_CNPART"."DATA_VECTOR" /*+ LOB_BY_VALUE */ ,
           VECTOR(:QUERY_VECTOR, *, * /*+  USEBLOBPCW_QVCGMD */ ), EUCLIDEAN)[BINARY_DOUBLE,8]
      17 - "GENVEC".ROWID[ROWID,10], "GENVEC"."NAME"[VARCHAR2,500]
    
    Note
    -----
       - dynamic statistics used: dynamic sampling (level=2)
       - this is an adaptive plan
    
    83 rows selected.
  15. 最後に、類似検索を再度実行します。
    SELECT name
      FROM genvec
      ORDER BY VECTOR_DISTANCE(v, :query_vector, EUCLIDEAN)
      FETCH APPROX FIRST 20 ROWS ONLY;

    出力例:

    NAME
    ________
    C5
    C5-15
    C5-13
    C5-3
    C5-11
    C5-1
    C5-8
    C5-6
    C5-7
    C5-12
    C5-9
    C5-4
    C5-2
    C5-5
    
    NAME
    ________
    C5-14
    C5-10
    C4-5
    C4-12
    C4-4
    C4-13
    
    20 rows selected.