This sample shows a crawl configuration of a file system data source with no manipulators and no filters.

Much of the configuration is specified in key/value pairs within a moduleProperty property element. To determine the values for configuration properties, run the getModuleSpec task of the cas-cmd utility.

<?xml version="1.0" encoding="UTF-8"?>

<configurations xmlns="http://endeca.com/itl/cas/2011-12">
    <crawlConfig>
      <crawlId>
        <id>FileCrawl</id>
      </crawlId>
      <unavailableIncrementalSwitchesToFullCrawl>false</unavailableIncrementalSwitchesToFullCrawl>
      <crawlThreads>3</crawlThreads>
      <sourceConfig>
        <moduleId>
          <id>File System</id>
        </moduleId>
        <moduleProperties>
          <moduleProperty>
            <key>expandArchives</key>
            <value>false</value>
          </moduleProperty>
          <moduleProperty>
            <key>gatherNativeFileProperties</key>
            <value>true</value>
          </moduleProperty>
          <moduleProperty>
            <key>seeds</key>
            <value>C:\tmp\itldocset</value>
            <value>C:\tmp\iapdocset</value>
            <value>C:\tmp\mdexdocset</value>
          </moduleProperty>
        </moduleProperties>
        <excludeFilters/>
        <includeFilters/>
      </sourceConfig>
      <textExtractionConfig>
        <enabled>true</enabled>
        <makeLocalCopy>true</makeLocalCopy>
        <timeout>90</timeout>
      </textExtractionConfig>
      <manipulatorConfigs/>
      <outputConfig>
        <moduleId>
          <id>File System</id>
        </moduleId>
        <moduleProperties>
          <moduleProperty>
            <key>outputXml</key>
            <value>true</value>
          </moduleProperty>
          <moduleProperty>
            <key>outputCompressed</key>
            <value>false</value>
          </moduleProperty>
          <moduleProperty>
            <key>outputPrefix</key>
            <value>CrawlerOutput</value>
          </moduleProperty>
          <moduleProperty>
            <key>outputDirectory</key>
            <value>C:\tmp</value>
          </moduleProperty>
        </moduleProperties>
      </outputConfig>
    </crawlConfig>
</configurations>


Copyright © Legal Notices