analyze_document.py

# This is an automatically generated code sample.
# To make this code sample work in your Oracle Cloud tenancy,
# please replace the values for any parameters whose current values do not fit
# your use case (such as resource IDs, strings containing ‘EXAMPLE’ or ‘unique_id’, and
# boolean, number, and enum parameters with values not fitting your use case).

import oci

# Create a default config using DEFAULT profile in default location
# Refer to
# https://docs.cloud.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm#SDK_and_CLI_Configuration_File
# for more info
config = oci.config.from_file()


# Initialize service client with default config file
ai_document_client = oci.ai_document.AIServiceDocumentClient(config)


# Send the request to service, some parameters are not required, see API
# doc for more info
analyze_document_response = ai_document_client.analyze_document(
    analyze_document_details=oci.ai_document.models.AnalyzeDocumentDetails(
        features=[
            oci.ai_document.models.DocumentTextExtractionFeature(
                feature_type="TEXT_EXTRACTION",
                generate_searchable_pdf=False,
                model_id="ocid1.test.oc1..<unique_ID>EXAMPLE-modelId-Value",
                selection_mark_detection=False)],
        document=oci.ai_document.models.ObjectStorageDocumentDetails(
            source="OBJECT_STORAGE",
            namespace_name="EXAMPLE-namespaceName-Value",
            bucket_name="EXAMPLE-bucketName-Value",
            object_name="EXAMPLE-objectName-Value",
            page_range=["EXAMPLE--Value"]),
        compartment_id="ocid1.test.oc1..<unique_ID>EXAMPLE-compartmentId-Value",
        output_location=oci.ai_document.models.OutputLocation(
            namespace_name="EXAMPLE-namespaceName-Value",
            bucket_name="EXAMPLE-bucketName-Value",
            prefix="EXAMPLE-prefix-Value"),
        language="EXAMPLE-language-Value",
        document_type="HEALTH_INSURANCE_ID",
        ocr_data=oci.ai_document.models.AnalyzeDocumentResult(
            document_metadata=oci.ai_document.models.DocumentMetadata(
                page_count=655,
                mime_type="EXAMPLE-mimeType-Value"),
            pages=[
                oci.ai_document.models.Page(
                    page_number=942,
                    dimensions=oci.ai_document.models.Dimensions(
                        width=7658.252,
                        height=9447.96,
                        unit="PIXEL"),
                    detected_document_types=[
                        oci.ai_document.models.DetectedDocumentType(
                            document_type="EXAMPLE-documentType-Value",
                            confidence=0.45726037,
                            document_id="ocid1.test.oc1..<unique_ID>EXAMPLE-documentId-Value")],
                    detected_languages=[
                        oci.ai_document.models.DetectedLanguage(
                            language="EXAMPLE-language-Value",
                            confidence=0.38078272)],
                    words=[
                        oci.ai_document.models.Word(
                            text="EXAMPLE-text-Value",
                            confidence=0.8342287,
                            bounding_polygon=oci.ai_document.models.BoundingPolygon(
                                normalized_vertices=[
                                    oci.ai_document.models.NormalizedVertex(
                                        x=0.25969464,
                                        y=0.46758157)]))],
                    lines=[
                        oci.ai_document.models.Line(
                            text="EXAMPLE-text-Value",
                            confidence=0.010752916,
                            bounding_polygon=oci.ai_document.models.BoundingPolygon(
                                normalized_vertices=[
                                    oci.ai_document.models.NormalizedVertex(
                                        x=0.105644405,
                                        y=0.36052614)]),
                            word_indexes=[39])],
                    tables=[
                        oci.ai_document.models.Table(
                            row_count=971,
                            column_count=744,
                            header_rows=[
                                oci.ai_document.models.TableRow(
                                    cells=[
                                        oci.ai_document.models.Cell(
                                            text="EXAMPLE-text-Value",
                                            row_index=61,
                                            column_index=869,
                                            confidence=0.678442,
                                            bounding_polygon=oci.ai_document.models.BoundingPolygon(
                                                normalized_vertices=[
                                                    oci.ai_document.models.NormalizedVertex(
                                                        x=0.79856807,
                                                        y=0.82585025)]),
                                            word_indexes=[800])])],
                            confidence=0.25737917,
                            bounding_polygon=oci.ai_document.models.BoundingPolygon(
                                normalized_vertices=[
                                    oci.ai_document.models.NormalizedVertex(
                                        x=0.7323424,
                                        y=0.018351376)]))],
                    document_fields=[
                        oci.ai_document.models.DocumentField(
                            field_type="KEY_VALUE",
                            field_value=oci.ai_document.models.ValueArray(
                                value_type="ARRAY",
                                confidence=0.12567782,
                                bounding_polygon=oci.ai_document.models.BoundingPolygon(
                                    normalized_vertices=[
                                        oci.ai_document.models.NormalizedVertex(
                                            x=0.8541965,
                                            y=0.14713478)]),
                                word_indexes=[380],
                                text="EXAMPLE-text-Value",
                                normalized_value="EXAMPLE-normalizedValue-Value",
                                normalized_confidence=0.9049306),
                            field_label=oci.ai_document.models.FieldLabel(
                                name="EXAMPLE-name-Value",
                                confidence=0.23141193),
                            field_name=oci.ai_document.models.FieldName(
                                name="EXAMPLE-name-Value",
                                confidence=0.53373486,
                                bounding_polygon=oci.ai_document.models.BoundingPolygon(
                                    normalized_vertices=[
                                        oci.ai_document.models.NormalizedVertex(
                                            x=0.15109724,
                                            y=0.02838248)]),
                                word_indexes=[702]))],
                    signatures=[
                        oci.ai_document.models.Signature(
                            confidence=0.19143075,
                            bounding_polygon=oci.ai_document.models.BoundingPolygon(
                                normalized_vertices=[
                                    oci.ai_document.models.NormalizedVertex(
                                        x=0.33537287,
                                        y=0.66658884)]))],
                    bar_codes=[
                        oci.ai_document.models.BarCode(
                            confidence=0.9056299,
                            value="EXAMPLE-value-Value",
                            bounding_polygon=oci.ai_document.models.BoundingPolygon(
                                normalized_vertices=[
                                    oci.ai_document.models.NormalizedVertex(
                                        x=0.33312047,
                                        y=0.43781954)]),
                            code_type="EXAMPLE-codeType-Value")],
                    selection_marks=[
                        oci.ai_document.models.SelectionMark(
                            state="SELECTED",
                            confidence=0.8500111,
                            bounding_polygon=oci.ai_document.models.BoundingPolygon(
                                normalized_vertices=[
                                    oci.ai_document.models.NormalizedVertex(
                                        x=0.8238592,
                                        y=0.9825451)]))])],
            detected_document_types=[
                oci.ai_document.models.DetectedDocumentType(
                    document_type="EXAMPLE-documentType-Value",
                    confidence=0.73039603,
                    document_id="ocid1.test.oc1..<unique_ID>EXAMPLE-documentId-Value")],
            detected_languages=[
                oci.ai_document.models.DetectedLanguage(
                    language="EXAMPLE-language-Value",
                    confidence=0.8732025)],
            document_classification_model_version="EXAMPLE-documentClassificationModelVersion-Value",
            language_classification_model_version="EXAMPLE-languageClassificationModelVersion-Value",
            text_extraction_model_version="EXAMPLE-textExtractionModelVersion-Value",
            key_value_extraction_model_version="EXAMPLE-keyValueExtractionModelVersion-Value",
            table_extraction_model_version="EXAMPLE-tableExtractionModelVersion-Value",
            signature_extraction_model_version="EXAMPLE-signatureExtractionModelVersion-Value",
            bar_code_extraction_model_version="EXAMPLE-barCodeExtractionModelVersion-Value",
            errors=[
                oci.ai_document.models.ProcessingError(
                    code="EXAMPLE-code-Value",
                    message="EXAMPLE-message-Value")],
            searchable_pdf="O0GEO6c7KPdimpz7uXmI")),
    if_match="EXAMPLE-ifMatch-Value",
    opc_request_id="5PGMWEXVTF3EFDHKF7FV<unique_ID>")

# Get the data from response
print(analyze_document_response.data)
Oracle Cloud Infrastructure Documentation