This fetch script is used to copy the incremental crawl output files to the appropriate directory for a partial update. The script is included in this section, with steps indicating the actions performed at each point in the script.
The script does not actually perform the partial update itself; that update operation is managed by scripts in the AppConfig.xml document.
<script id="fetchIncrementalCasCrawlData"> <![CDATA[ log.info("Fetching incremental CAS crawl data for processing.");
// try to acquire a lock on the complete crawl data directory // for up to 10 minutes if (LockManager.acquireLockBlocking("complete_cas_crawl_data_lock", 600))
incrSrcDir = PathUtils.getAbsolutePath(CAS.getWorkingDir(), CAS.getCaCrawlIncrementalOutputDestDir()) + "/\\*";
incrDestDir = PathUtils.getAbsolutePath(PartialForge.getWorkingDir(), PartialForge.getIncomingDataDir());
// copy incremental crawl data crawlDataCopy = new CopyUtility(PartialForge.getAppName(), PartialForge.getEacHost(), PartialForge.getEacPort(), PartialForge.isSslEnabled()); crawlDataCopy.init("copy_complete_cas_incremental_crawl_data", CAS.getFsCrawlOutputDestHost(),PartialForge.getHostId(), incrSrcDir, incrDestDir, true); crawlDataCopy.run();
// (re)set flags indicating which partial update files are ready // for processing -- convention is "partial_extract::[filename]" fileUtil = new FileUtility(PartialForge.getAppName(), PartialForge.getEacHost(), PartialForge.getEacPort(), PartialForge.isSslEnabled()); dirContents = fileUtil.getDirContents(incrDestDir, PartialForge.getHostId()); for (file : dirContents.keySet()) { fileName = PathUtils.getFileNameFromPath(file); LockManager.setFlag("partial_extract::" + fileName); }
// release lock on the crawl data directory LockManager.releaseLock("complete_cas_crawl_data_lock"); ... log.info("Crawl data fetch script finished.");