BINARYベクトル・ジェネレータの使用によるSQLクイック・スタート
一連のプロシージャを使用してBINARY
ベクトルを生成し、ベクトル埋込みモデルを使用せずにOracle AI Vector Searchを簡単に開始できます。
記載されているプロシージャを使用すると、指定した数の次元およびクラスタを持つバイナリ・ベクトルをランダムに生成できます。生成プロセスの出力は、
genbvec
という表への移入で、これを使用して類似検索などを試すことができます。
次の手順では、十分な権限(最小でDB_DEVELOPER_ROLE
ロール)を持つデータベース・アカウントへのアクセス権がすでにあることを前提としています。
ノート:
本番データベースでBINARY
ベクトル・ジェネレータを使用しないでください。このチュートリアルは、テストおよびデモの目的でのみ使用してください。
ノート:
サードパーティのBINARY
ベクトル埋込みモデルにすでにアクセスできる場合は、ベクトル・ユーティリティのPL/SQLパッケージDBMS_VECTOR
を使用してサードパーティのREST APIをコールすることで、実際のテキストからBINARY
埋込みへの変換を実行できます。詳細は、「Oracle Databaseの外部でのテキスト文字列からBINARY埋込みへの変換」の例を参照してください。
genbvec
表を作成します。DROP TABLE genbvec PURGE; CREATE TABLE genbvec ( id NUMBER, -- id of the generated vector v VECTOR(*, BINARY), -- generated vector name VARCHAR2(500), -- name for the generated vector: C1 to Cn are centroids, Cx_y is vector number y in cluster number x bv VARCHAR2(40), -- bit version of the generated vector ly NUMBER -- random number you can use to filter out rows in addiion to similarity search on vectors );
BINARY
ベクトルの生成に使用するプロシージャを作成します:CREATE OR REPLACE PROCEDURE generate_random_binary_vector( dimensions IN NUMBER, result_int OUT VECTOR, result_binary OUT VARCHAR2 ) IS binary_vector VARCHAR2(40); int8_value NUMBER; number_of_bits NUMBER; char_vector VARCHAR2(40); BEGIN -- Validate dimension is a multiple of 8 IF MOD(dimensions, 8) != 0 THEN RAISE_APPLICATION_ERROR(-20001, 'Number of dimensions must be a multiple of 8'); END IF; -- Generate the random binary vector binary_vector := ''; FOR i IN 1 .. dimensions LOOP IF DBMS_RANDOM.VALUE(0, 1) < 0.5 THEN binary_vector := binary_vector || '0'; ELSE binary_vector := binary_vector || '1'; END IF; END LOOP; -- Convert 8-bit packets to their int8 values and build the result string number_of_bits := dimensions/8; char_vector := '['; FOR i IN 0 .. number_of_bits - 1 LOOP int8_value := 0; FOR j IN 0 .. 7 LOOP int8_value := int8_value + TO_NUMBER(SUBSTR(binary_vector, i*8+j+1, 1)) * POWER(2, j); END LOOP; char_vector := char_vector || int8_value; IF i < number_of_bits - 1 THEN char_vector := char_vector || ','; END IF; END LOOP; char_vector := char_vector || ']'; -- Return the generated vector value result_int := to_vector(char_vector, dimensions, BINARY); result_binary := binary_vector; END generate_random_binary_vector; /
CREATE OR REPLACE PROCEDURE generate_binary_cluster( centroid IN VARCHAR2, -- a string of 1 and 0 spread IN NUMBER, -- Maximum Hamming distance between centroid and other vectors in the same cluster cluster_size IN NUMBER, -- Number of vectors to generate in addition to the centroid result_binary OUT SYS_REFCURSOR, result_int8 OUT SYS_REFCURSOR ) IS dimension NUMBER; max_spread NUMBER; vector VARCHAR2(40); char_vector VARCHAR2(40); flip_positions DBMS_SQL.VARCHAR2_TABLE; random_position NUMBER; tresult_binary DBMS_SQL.VARCHAR2_TABLE; tresult_int8 DBMS_SQL.VARCHAR2_TABLE; binary_vector VARCHAR2(40); cluster_index NUMBER := 1; number_of_bits NUMBER; int8_value NUMBER; BEGIN -- Determine the dimension of the centroid vector dimension := LENGTH(centroid); -- Ensure dimension is a multiple of 8 IF MOD(dimension, 8) != 0 THEN RAISE_APPLICATION_ERROR(-20001, 'Number of dimensions must be a multiple of 8'); END IF; -- Generate the cluster of binary vectors WHILE cluster_index <= cluster_size LOOP binary_vector := centroid; -- Randomly flip bits in the centroid vector with a max of spread bits max_spread := TRUNC(DBMS_RANDOM.VALUE(1, spread+1)); flip_positions.DELETE; FOR i IN 1 .. max_spread LOOP random_position := TRUNC(DBMS_RANDOM.VALUE(1, dimension+1)); -- Ensure no duplicates WHILE flip_positions.EXISTS(random_position) LOOP random_position := TRUNC(DBMS_RANDOM.VALUE(1, dimension+1)); END LOOP; flip_positions(random_position) := '1'; END LOOP; -- Apply flips to binary vector FOR i IN 1 .. dimension LOOP IF flip_positions.EXISTS(i) THEN IF SUBSTR(binary_vector, i, 1) = '0' THEN binary_vector := SUBSTR(binary_vector, 1, i-1) || '1' || SUBSTR(binary_vector, i+1); ELSE binary_vector := SUBSTR(binary_vector, 1, i-1) || '0' || SUBSTR(binary_vector, i+1); END IF; END IF; END LOOP; -- Convert binary vector to int8 values number_of_bits := dimension/8; char_vector := '['; FOR i IN 0 .. number_of_bits-1 LOOP int8_value := 0; FOR j IN 0 .. 7 LOOP int8_value := int8_value + TO_NUMBER(SUBSTR(binary_vector, i*8+j+1, 1)) * POWER(2, j); END LOOP; char_vector := char_vector || int8_value; IF i < number_of_bits-1 THEN char_vector := char_vector || ','; END IF; END LOOP; char_vector := char_vector || ']'; -- Add generated vectors to result tables tresult_binary(cluster_index) := binary_vector; tresult_int8(cluster_index) := char_vector; cluster_index := cluster_index + 1; END LOOP; -- Open cursor for binary result set OPEN result_binary FOR SELECT COLUMN_VALUE AS binary_vector FROM TABLE(tresult_binary); -- Open cursor for int8 result set OPEN result_int8 FOR SELECT COLUMN_VALUE AS int8_vector FROM TABLE(tresult_int8); END generate_binary_cluster; /
CREATE OR REPLACE PROCEDURE generate_binary_vectors( num_vectors NUMBER, -- If numbers of vector is not a multiple of num_clusters, remaining vectors are not generated num_clusters NUMBER, -- Must be greater than 0 dimensions NUMBER, -- Must be a multiple of 8 cluster_spread NUMBER -- Maximum Hamming distance between centroid and other vectors in the same cluster: max number of bits flipped ) IS vectors_per_cluster NUMBER; remaining_vectors NUMBER; i NUMBER := 1; j NUMBER := 1; idx NUMBER := 1; max_id NUMBER; ri VECTOR(*, BINARY); rb VARCHAR2(40); result_binary SYS_REFCURSOR; result_int8 SYS_REFCURSOR; vb VARCHAR2(40); vi VARCHAR2(40); BEGIN IF (num_vectors) <=0 OR (num_clusters < 1) OR (num_vectors < num_clusters) OR (dimensions <= 0) OR (dimensions > 504) OR (cluster_spread <= 0) THEN RAISE_APPLICATION_ERROR(-20001, 'Issues with arguments provided'); END IF; SELECT MAX(id) INTO max_id FROM genbvec; IF max_id IS NULL THEN max_id := 0; END IF; -- Calculate vectors per cluster vectors_per_cluster := TRUNC(num_vectors / num_clusters); remaining_vectors := num_vectors MOD num_clusters; -- remaining vectors are not generated -- Generate cluster centroids FOR i IN 1..num_clusters LOOP generate_random_binary_vector(dimensions, ri, rb); INSERT INTO genbvec VALUES (max_id + idx, ri, 'C'||i, rb, DBMS_RANDOM.VALUE(3,600000000)); idx := idx + 1; -- Generate vectors for each cluster IF vectors_per_cluster > 1 THEN generate_binary_cluster(rb, cluster_spread, vectors_per_cluster, result_binary, result_int8); -- Output the binary result j:= 1; LOOP FETCH result_binary INTO vb; FETCH result_int8 INTO vi; EXIT WHEN result_binary%NOTFOUND; ri := TO_VECTOR(vi, dimensions, BINARY); INSERT INTO genbvec VALUES (max_id + idx, ri, 'C'||i||'-'||j, vb, DBMS_RANDOM.VALUE(3,600000000)); j := j+1; idx := idx + 1; END LOOP; CLOSE result_binary; CLOSE result_int8; END IF; END LOOP; COMMIT; END generate_binary_vectors; /
- ベクトル・ジェネレータ・プロシージャを設定したら、このステップのコマンドを実行して、データベース内で
BINARY
ベクトルの試験使用を開始できます。この例では、2つのクラスタを生成します。それぞれが32次元のベクトル(重心を含む)を21個持ち、重心からの最大の分散は3です:
generate_binary_vectors
プロシージャを使用して、いくつかのBINARY
ベクトルの生成を開始します。生成の結果が、表genbvec
に挿入されます。BEGIN generate_binary_vectors( num_vectors => 40, -- If numbers of vector is not a multiple of num_clusters, remaining vectors are not generated num_clusters => 2, -- Must be grather than 0 dimensions => 32, -- Must be a multiple of 8 and less than 504 cluster_spread => 3 -- Maximum Hamming distance between centroid and other vectors in the same cluster: max number of bits flipped ); END; /
SELECT
文を実行して、生成されたBINARY
ベクトルを表示します。SET SERVEROUTPUT ON; SELECT id, v, name, VECTOR_DIMENSION_COUNT(v) DIMS, VECTOR_DIMENSION_FORMAT(v) FORMAT, bv, ly FROM genbvec;
出力例:
ID V NAME DIMS FORMAT BV LY ------- ------------------- --------- -------- ---------- ----------------------------------- -------------- 1 [24,153,161,63] C1 32 BINARY 00011000100110011000010111111100 99789021.1 2 [24,153,165,63] C1-1 32 BINARY 00011000100110011010010111111100 60221003.5 3 [26,152,165,63] C1-2 32 BINARY 01011000000110011010010111111100 387124796 4 [24,201,161,62] C1-3 32 BINARY 00011000100100111000010101111100 291263868 5 [24,187,161,63] C1-4 32 BINARY 00011000110111011000010111111100 583827824 6 [24,153,161,61] C1-5 32 BINARY 00011000100110011000010110111100 144826451 7 [24,153,171,55] C1-6 32 BINARY 00011000100110011101010111101100 113684378 8 [88,153,161,61] C1-7 32 BINARY 00011010100110011000010110111100 312081799 9 [152,217,161,47] C1-8 32 BINARY 00011001100110111000010111110100 173971628 10 [24,153,163,59] C1-9 32 BINARY 00011000100110011100010111011100 500775192 11 [24,153,160,61] C1-10 32 BINARY 00011000100110010000010110111100 137309652 12 [25,185,161,47] C1-11 32 BINARY 10011000100111011000010111110100 483392712 13 [89,153,161,63] C1-12 32 BINARY 10011010100110011000010111111100 458730494 14 [24,153,229,31] C1-13 32 BINARY 00011000100110011010011111111000 325738354 ID V NAME DIMS FORMAT BV LY ------- ------------------- --------- -------- ---------- ----------------------------------- -------------- 15 [24,152,161,63] C1-14 32 BINARY 00011000000110011000010111111100 260267242 16 [24,153,165,63] C1-15 32 BINARY 00011000100110011010010111111100 153663322 17 [24,137,169,63] C1-16 32 BINARY 00011000100100011001010111111100 411918780 18 [24,185,161,63] C1-17 32 BINARY 00011000100111011000010111111100 53885587.1 19 [152,137,161,63] C1-18 32 BINARY 00011001100100011000010111111100 321305137 20 [25,153,161,63] C1-19 32 BINARY 10011000100110011000010111111100 180742593 21 [16,153,161,63] C1-20 32 BINARY 00001000100110011000010111111100 511768659 22 [183,107,24,190] C2 32 BINARY 11101101110101100001100001111101 529205377 23 [181,251,24,190] C2-1 32 BINARY 10101101110111110001100001111101 391560729 24 [191,107,25,186] C2-2 32 BINARY 11111101110101101001100001011101 191852938 25 [182,106,24,190] C2-3 32 BINARY 01101101010101100001100001111101 164088550 26 [183,107,56,187] C2-4 32 BINARY 11101101110101100001110011011101 20400437.6 27 [183,106,16,190] C2-5 32 BINARY 11101101010101100000100001111101 363725396 28 [183,107,40,190] C2-6 32 BINARY 11101101110101100001010001111101 144549103 ID V NAME DIMS FORMAT BV LY ------- ------------------- --------- -------- ---------- ----------------------------------- -------------- 29 [183,107,26,190] C2-7 32 BINARY 11101101110101100101100001111101 318036129 30 [183,123,24,188] C2-8 32 BINARY 11101101110111100001100000111101 309460286 31 [179,35,24,190] C2-9 32 BINARY 11001101110001000001100001111101 25042254.7 32 [182,251,24,190] C2-10 32 BINARY 01101101110111110001100001111101 355499793 33 [183,251,24,190] C2-11 32 BINARY 11101101110111110001100001111101 483002129 34 [183,107,24,254] C2-12 32 BINARY 11101101110101100001100001111111 497697267 35 [183,42,24,158] C2-13 32 BINARY 11101101010101000001100001111001 64446273.5 36 [151,107,28,186] C2-14 32 BINARY 11101001110101100011100001011101 248483969 37 [167,43,16,190] C2-15 32 BINARY 11100101110101000000100001111101 513880134 38 [183,106,24,190] C2-16 32 BINARY 11101101010101100001100001111101 558247180 39 [183,123,24,190] C2-17 32 BINARY 11101101110111100001100001111101 287706546 40 [151,107,24,190] C2-18 32 BINARY 11101001110101100001100001111101 309138884 41 [167,107,28,186] C2-19 32 BINARY 11100101110101100011100001011101 433932877 42 [63,106,24,190] C2-20 32 BINARY 11111100010101100001100001111101 84539416.7
BINARY
ベクトルの類似検索を実行します。SELECT name, v, bv, VECTOR_DISTANCE(v, (SELECT v FROM genbvec WHERE name='C1'), HAMMING) DISTANCE FROM genbvec ORDER BY VECTOR_DISTANCE(v, (SELECT v FROM genbvec WHERE name='C1'), HAMMING);
出力例:
NAME V BV DISTANCE ------- ----------------- --------------------------------- ---------- C1 [24,153,161,63] 00011000100110011000010111111100 0 C1-1 [24,153,165,63] 00011000100110011010010111111100 1.0 C1-20 [16,153,161,63] 00001000100110011000010111111100 1.0 C1-19 [25,153,161,63] 10011000100110011000010111111100 1.0 C1-17 [24,185,161,63] 00011000100111011000010111111100 1.0 C1-15 [24,153,165,63] 00011000100110011010010111111100 1.0 C1-14 [24,152,161,63] 00011000000110011000010111111100 1.0 C1-5 [24,153,161,61] 00011000100110011000010110111100 1.0 C1-4 [24,187,161,63] 00011000110111011000010111111100 2.0 C1-18 [152,137,161,63] 00011001100100011000010111111100 2.0 C1-16 [24,137,169,63] 00011000100100011001010111111100 2.0 C1-12 [89,153,161,63] 10011010100110011000010111111100 2.0 C1-10 [24,153,160,61] 00011000100110010000010110111100 2.0 C1-9 [24,153,163,59] 00011000100110011100010111011100 2.0 NAME V BV DISTANCE ------- ----------------- --------------------------------- ---------- C1-7 [88,153,161,61] 00011010100110011000010110111100 2.0 C1-2 [26,152,165,63] 01011000000110011010010111111100 3.0 C1-3 [24,201,161,62] 00011000100100111000010101111100 3.0 C1-6 [24,153,171,55] 00011000100110011101010111101100 3.0 C1-8 [152,217,161,47] 00011001100110111000010111110100 3.0 C1-11 [25,185,161,47] 10011000100111011000010111110100 3.0 C1-13 [24,153,229,31] 00011000100110011010011111111000 3.0 C2-1 [181,251,24,190] 10101101110111110001100001111101 15.0 C2-10 [182,251,24,190] 01101101110111110001100001111101 15.0 C2-6 [183,107,40,190] 11101101110101100001010001111101 16.0 C2-11 [183,251,24,190] 11101101110111110001100001111101 16.0 C2-2 [191,107,25,186] 11111101110101101001100001011101 17.0 C2-20 [63,106,24,190] 11111100010101100001100001111101 17.0 C2-18 [151,107,24,190] 11101001110101100001100001111101 17.0 NAME V BV DISTANCE ------- ----------------- --------------------------------- ---------- C2-17 [183,123,24,190] 11101101110111100001100001111101 17.0 C2-15 [167,43,16,190] 11100101110101000000100001111101 17.0 C2-9 [179,35,24,190] 11001101110001000001100001111101 17.0 C2-4 [183,107,56,187] 11101101110101100001110011011101 17.0 C2 [183,107,24,190] 11101101110101100001100001111101 18.0 C2-8 [183,123,24,188] 11101101110111100001100000111101 18.0 C2-3 [182,106,24,190] 01101101010101100001100001111101 18.0 C2-5 [183,106,16,190] 11101101010101100000100001111101 18.0 C2-7 [183,107,26,190] 11101101110101100101100001111101 19.0 C2-12 [183,107,24,254] 11101101110101100001100001111111 19.0 C2-13 [183,42,24,158] 11101101010101000001100001111001 19.0 C2-14 [151,107,28,186] 11101001110101100011100001011101 19.0 C2-16 [183,106,24,190] 11101101010101100001100001111101 19.0 C2-19 [167,107,28,186] 11100101110101100011100001011101 21.0 42 rows selected.
- 別の類似検索を実行します。今回は結果を最初の21行に制限します。この例では、これは結果にクラスタ1の
BINARY
ベクトルのみが含まれることを意味します。SELECT name, v, bv, VECTOR_DISTANCE(v, (SELECT v FROM genbvec WHERE name='C1'), HAMMING) DISTANCE FROM genbvec ORDER BY VECTOR_DISTANCE(v, (SELECT v FROM genbvec WHERE name='C1'), HAMMING) FETCH EXACT FIRST 21 ROWS ONLY;
出力例:
NAME V BV DISTANCE ------- ------------------ --------------------------------- ---------- C1 [24,153,161,63] 00011000100110011000010111111100 0 C1-1 [24,153,165,63] 00011000100110011010010111111100 1.0 C1-20 [16,153,161,63] 00001000100110011000010111111100 1.0 C1-19 [25,153,161,63] 10011000100110011000010111111100 1.0 C1-17 [24,185,161,63] 00011000100111011000010111111100 1.0 C1-15 [24,153,165,63] 00011000100110011010010111111100 1.0 C1-14 [24,152,161,63] 00011000000110011000010111111100 1.0 C1-5 [24,153,161,61] 00011000100110011000010110111100 1.0 C1-4 [24,187,161,63] 00011000110111011000010111111100 2.0 C1-18 [152,137,161,63] 00011001100100011000010111111100 2.0 C1-16 [24,137,169,63] 00011000100100011001010111111100 2.0 C1-12 [89,153,161,63] 10011010100110011000010111111100 2.0 C1-10 [24,153,160,61] 00011000100110010000010110111100 2.0 C1-9 [24,153,163,59] 00011000100110011100010111011100 2.0 NAME V BV DISTANCE ------- ------------------ --------------------------------- ---------- C1-7 [88,153,161,61] 00011010100110011000010110111100 2.0 C1-2 [26,152,165,63] 01011000000110011010010111111100 3.0 C1-3 [24,201,161,62] 00011000100100111000010101111100 3.0 C1-6 [24,153,171,55] 00011000100110011101010111101100 3.0 C1-8 [152,217,161,47] 00011001100110111000010111110100 3.0 C1-11 [25,185,161,47] 10011000100111011000010111110100 3.0 C1-13 [24,153,229,31] 00011000100110011010011111111000 3.0 21 rows selected.
- この反復では、類似検索で
HAMMING
距離メトリックが省略されています。ただし、BINARY
ベクトルで使用されるデフォルトのメトリックはHAMMING
であるため、結果は前の問合せと同じです。SELECT name, v, bv, VECTOR_DISTANCE(v, (SELECT v FROM genbvec WHERE name='C1')) DISTANCE FROM genbvec ORDER BY VECTOR_DISTANCE(v, (SELECT v FROM genbvec WHERE name='C1')) FETCH EXACT FIRST 21 ROWS ONLY;
出力例:
NAME V BV DISTANCE ------- ------------------ --------------------------------- ---------- C1 [24,153,161,63] 00011000100110011000010111111100 0 C1-1 [24,153,165,63] 00011000100110011010010111111100 1.0 C1-20 [16,153,161,63] 00001000100110011000010111111100 1.0 C1-19 [25,153,161,63] 10011000100110011000010111111100 1.0 C1-17 [24,185,161,63] 00011000100111011000010111111100 1.0 C1-15 [24,153,165,63] 00011000100110011010010111111100 1.0 C1-14 [24,152,161,63] 00011000000110011000010111111100 1.0 C1-5 [24,153,161,61] 00011000100110011000010110111100 1.0 C1-4 [24,187,161,63] 00011000110111011000010111111100 2.0 C1-18 [152,137,161,63] 00011001100100011000010111111100 2.0 C1-16 [24,137,169,63] 00011000100100011001010111111100 2.0 C1-12 [89,153,161,63] 10011010100110011000010111111100 2.0 C1-10 [24,153,160,61] 00011000100110010000010110111100 2.0 C1-9 [24,153,163,59] 00011000100110011100010111011100 2.0 NAME V BV DISTANCE ------- ------------------ --------------------------------- ---------- C1-7 [88,153,161,61] 00011010100110011000010110111100 2.0 C1-2 [26,152,165,63] 01011000000110011010010111111100 3.0 C1-3 [24,201,161,62] 00011000100100111000010101111100 3.0 C1-6 [24,153,171,55] 00011000100110011101010111101100 3.0 C1-8 [152,217,161,47] 00011001100110111000010111110100 3.0 C1-11 [25,185,161,47] 10011000100111011000010111110100 3.0 C1-13 [24,153,229,31] 00011000100110011010011111111000 3.0 21 rows selected.
親トピック: スタート・ガイド