How CMS crawls handle multiple pieces of content

Some CMS repositories support items with multiple pieces of content. In these cases the IAS Server outputs a record for the item and records for each piece of content.

For example, an item from the Documentum Content Server repository could contain an attached PDF and an Excel file.

After the crawl, the records for each piece of content will contain:

Example of generated records for items with multiple pieces of content

This example includes a portion of output for two records — the first is the root document that has two pieces of attached content. The second is the first of the attached pieces. The Id property is produced by concatenating the RepositoryId with the ItemId, plus the child record's ContentPieceId (if present), using a colon as a delimiter (shown in bold in the example):

<?xml version="1.0"
encoding="UTF-8"?>
<RECORDS>
 <RECORD>
    <PROP NAME="Endeca.Action">
       <PVAL>UPSERT</PVAL>
    </PROP>
    <PROP NAME="Endeca.CMS.ContentLength">
       <PVAL>0</PVAL>
    </PROP>
 
    ...

    <PROP NAME="Endeca.CMS.Name">
       <PVAL>doc_with_attachment</PVAL>
    </PROP>
    <PROP NAME="Endeca.CMS.NumContentPieces">
       <PVAL>2</PVAL>
    </PROP>
    <PROP NAME="Endeca.CMS.RepositoryId">
       <PVAL>discussion</PVAL>
    </PROP>

    ...

				<PROP NAME="Endeca.Id">
       <PVAL>discussion:doc_with_attachment</PVAL>
    </PROP>
    <PROP NAME="Endeca.CMS.ItemId">
       <PVAL>doc_with_attachment</PVAL>
    </PROP>
    <PROP NAME="Endeca.CMS.RepositoryType">
       <PVAL>Documentum Content Server</PVAL>
    </PROP>
    <PROP NAME="Endeca.CMS.RepositoryVersion">
       <PVAL>release 6.5</PVAL>
    </PROP>
    <PROP NAME="Endeca.SourceType">
       <PVAL>CMS</PVAL>
    </PROP>
    <PROP NAME="Endeca.SourceId">
      <PVAL>DocumentumSource</PVAL>
    </PROP> 
 </RECORD>
 <RECORD>
    <PROP NAME="Endeca.Action">
       <PVAL>UPSERT</PVAL>
    </PROP>
    <PROP NAME="Endeca.CMS.ContentLength">
       <PVAL>54699</PVAL>
    </PROP>
    <PROP NAME="Endeca.CMS.ContentPieceId">
       <PVAL>Attached.pdf</PVAL>
    </PROP>
    <PROP NAME="Endeca.CMS.RepositoryId">
       <PVAL>discussion</PVAL>
    </PROP>
     
    ...

    <PROP NAME="Endeca.Id">
       <PVAL>discussion:doc_with_attachment:attached.pdf</PVAL>
    </PROP>

    <PROP NAME="Endeca.CMS.IsFolder">
       <PVAL>false</PVAL>
    </PROP>
    <PROP NAME="Endeca.CMS.ItemId">
       <PVAL>doc_with_attachment</PVAL>
    </PROP>
    <PROP NAME="Endeca.CMS.MimeType">
       <PVAL>application/pdf</PVAL>
    </PROP>

    ...
     
    <PROP NAME="Endeca.Document.Type">
       <PVAL>Adobe Acrobat (PDF)</PVAL>
    </PROP>
    <PROP NAME="Endeca.File.Size">
       <PVAL>54699</PVAL>
    </PROP>
    <PROP NAME="Endeca.SourceType">
       <PVAL>CMS</PVAL>
    </PROP>
    <PROP NAME="Endeca.SourceId">
       <PVAL>DocumentumSource</PVAL>
    </PROP>
 </RECORD>

...

</RECORDS>