%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /home1/lightco1/www/feiss.com.au/administrator/components/com_finder/helpers/indexer/
Upload File :
Create Path :
Current File : //home1/lightco1/www/feiss.com.au/administrator/components/com_finder/helpers/indexer/indexer.php

<?php
/**
 * @package     Joomla.Administrator
 * @subpackage  com_finder
 *
 * @copyright   Copyright (C) 2005 - 2016 Open Source Matters, Inc. All rights reserved.
 * @license     GNU General Public License version 2 or later; see LICENSE
 */

defined('_JEXEC') or die;

JLoader::register('FinderIndexerHelper', __DIR__ . '/helper.php');
JLoader::register('FinderIndexerParser', __DIR__ . '/parser.php');
JLoader::register('FinderIndexerStemmer', __DIR__ . '/stemmer.php');
JLoader::register('FinderIndexerTaxonomy', __DIR__ . '/taxonomy.php');
JLoader::register('FinderIndexerToken', __DIR__ . '/token.php');

jimport('joomla.filesystem.file');

/**
 * Main indexer class for the Finder indexer package.
 *
 * The indexer class provides the core functionality of the Finder
 * search engine. It is responsible for adding and updating the
 * content links table; extracting and scoring tokens; and maintaining
 * all referential information for the content.
 *
 * Note: All exceptions thrown from within this class should be caught
 * by the controller.
 *
 * @since  2.5
 */
abstract class FinderIndexer
{
	/**
	 * The title context identifier.
	 *
	 * @var    integer
	 * @since  2.5
	 */
	const TITLE_CONTEXT = 1;

	/**
	 * The text context identifier.
	 *
	 * @var    integer
	 * @since  2.5
	 */
	const TEXT_CONTEXT = 2;

	/**
	 * The meta context identifier.
	 *
	 * @var    integer
	 * @since  2.5
	 */
	const META_CONTEXT = 3;

	/**
	 * The path context identifier.
	 *
	 * @var    integer
	 * @since  2.5
	 */
	const PATH_CONTEXT = 4;

	/**
	 * The misc context identifier.
	 *
	 * @var    integer
	 * @since  2.5
	 */
	const MISC_CONTEXT = 5;

	/**
	 * The indexer state object.
	 *
	 * @var    JObject
	 * @since  2.5
	 */
	public static $state;

	/**
	 * The indexer profiler object.
	 *
	 * @var    JProfiler
	 * @since  2.5
	 */
	public static $profiler;

	/**
	 * Returns a reference to the FinderIndexer object.
	 *
	 * @return  FinderIndexer instance based on the database driver
	 *
	 * @since   3.0
	 * @throws  RuntimeException if driver class for indexer not present.
	 */
	public static function getInstance()
	{
		// Setup the adapter for the indexer.
		$format = JFactory::getDbo()->name;

		if ($format == 'mysqli' || $format == 'pdomysql')
		{
			$format = 'mysql';
		}
		elseif ($format == 'sqlazure')
		{
			$format = 'sqlsrv';
		}

		$path = __DIR__ . '/driver/' . $format . '.php';
		$class = 'FinderIndexerDriver' . ucfirst($format);

		// Check if a parser exists for the format.
		if (file_exists($path))
		{
			// Instantiate the parser.
			include_once $path;

			return new $class;
		}

		// Throw invalid format exception.
		throw new RuntimeException(JText::sprintf('COM_FINDER_INDEXER_INVALID_DRIVER', $format));
	}

	/**
	 * Method to get the indexer state.
	 *
	 * @return  object  The indexer state object.
	 *
	 * @since   2.5
	 */
	public static function getState()
	{
		// First, try to load from the internal state.
		if (!empty(static::$state))
		{
			return static::$state;
		}

		// If we couldn't load from the internal state, try the session.
		$session = JFactory::getSession();
		$data = $session->get('_finder.state', null);

		// If the state is empty, load the values for the first time.
		if (empty($data))
		{
			$data = new JObject;

			// Load the default configuration options.
			$data->options = JComponentHelper::getParams('com_finder');

			// Setup the weight lookup information.
			$data->weights = array(
				self::TITLE_CONTEXT => round($data->options->get('title_multiplier', 1.7), 2),
				self::TEXT_CONTEXT  => round($data->options->get('text_multiplier', 0.7), 2),
				self::META_CONTEXT  => round($data->options->get('meta_multiplier', 1.2), 2),
				self::PATH_CONTEXT  => round($data->options->get('path_multiplier', 2.0), 2),
				self::MISC_CONTEXT  => round($data->options->get('misc_multiplier', 0.3), 2)
			);

			// Set the current time as the start time.
			$data->startTime = JFactory::getDate()->toSql();

			// Set the remaining default values.
			$data->batchSize   = (int) $data->options->get('batch_size', 50);
			$data->batchOffset = 0;
			$data->totalItems  = 0;
			$data->pluginState = array();
		}

		// Setup the profiler if debugging is enabled.
		if (JFactory::getApplication()->get('debug'))
		{
			static::$profiler = JProfiler::getInstance('FinderIndexer');
		}

		// Setup the stemmer.
		if ($data->options->get('stem', 1) && $data->options->get('stemmer', 'porter_en'))
		{
			FinderIndexerHelper::$stemmer = FinderIndexerStemmer::getInstance($data->options->get('stemmer', 'porter_en'));
		}

		// Set the state.
		static::$state = $data;

		return static::$state;
	}

	/**
	 * Method to set the indexer state.
	 *
	 * @param   object  $data  A new indexer state object.
	 *
	 * @return  boolean  True on success, false on failure.
	 *
	 * @since   2.5
	 */
	public static function setState($data)
	{
		// Check the state object.
		if (empty($data) || !$data instanceof JObject)
		{
			return false;
		}

		// Set the new internal state.
		static::$state = $data;

		// Set the new session state.
		JFactory::getSession()->set('_finder.state', $data);

		return true;
	}

	/**
	 * Method to reset the indexer state.
	 *
	 * @return  void
	 *
	 * @since   2.5
	 */
	public static function resetState()
	{
		// Reset the internal state to null.
		self::$state = null;

		// Reset the session state to null.
		JFactory::getSession()->set('_finder.state', null);
	}

	/**
	 * Method to index a content item.
	 *
	 * @param   FinderIndexerResult  $item    The content item to index.
	 * @param   string               $format  The format of the content. [optional]
	 *
	 * @return  integer  The ID of the record in the links table.
	 *
	 * @since   2.5
	 * @throws  Exception on database error.
	 */
	abstract public function index($item, $format = 'html');

	/**
	 * Method to remove a link from the index.
	 *
	 * @param   integer  $linkId  The id of the link.
	 *
	 * @return  boolean  True on success.
	 *
	 * @since   2.5
	 * @throws  Exception on database error.
	 */
	abstract public function remove($linkId);

	/**
	 * Method to optimize the index. We use this method to remove unused terms
	 * and any other optimizations that might be necessary.
	 *
	 * @return  boolean  True on success.
	 *
	 * @since   2.5
	 * @throws  Exception on database error.
	 */
	abstract public function optimize();

	/**
	 * Method to get a content item's signature.
	 *
	 * @param   object  $item  The content item to index.
	 *
	 * @return  string  The content item's signature.
	 *
	 * @since   2.5
	 */
	protected static function getSignature($item)
	{
		// Get the indexer state.
		$state = static::getState();

		// Get the relevant configuration variables.
		$config = array(
			$state->weights,
			$state->options->get('stem', 1),
			$state->options->get('stemmer', 'porter_en')
		);

		return md5(serialize(array($item, $config)));
	}

	/**
	 * Method to parse input, tokenize it, and then add it to the database.
	 *
	 * @param   mixed    $input    String or resource to use as input. A resource input will automatically be chunked to conserve
	 *                             memory. Strings will be chunked if longer than 2K in size.
	 * @param   integer  $context  The context of the input. See context constants.
	 * @param   string   $lang     The language of the input.
	 * @param   string   $format   The format of the input.
	 *
	 * @return  integer  The number of tokens extracted from the input.
	 *
	 * @since   2.5
	 */
	protected function tokenizeToDb($input, $context, $lang, $format)
	{
		$count = 0;
		$buffer = null;

		if (!empty($input))
		{
			// If the input is a resource, batch the process out.
			if (is_resource($input))
			{
				// Batch the process out to avoid memory limits.
				while (!feof($input))
				{
					// Read into the buffer.
					$buffer .= fread($input, 2048);

					/*
					 * If we haven't reached the end of the file, seek to the last
					 * space character and drop whatever is after that to make sure
					 * we didn't truncate a term while reading the input.
					 */
					if (!feof($input))
					{
						// Find the last space character.
						$ls = strrpos($buffer, ' ');

						// Adjust string based on the last space character.
						if ($ls)
						{
							// Truncate the string to the last space character.
							$string = substr($buffer, 0, $ls);

							// Adjust the buffer based on the last space for the next iteration and trim.
							$buffer = JString::trim(substr($buffer, $ls));
						}
						// No space character was found.
						else
						{
							$string = $buffer;
						}
					}
					// We've reached the end of the file, so parse whatever remains.
					else
					{
						$string = $buffer;
					}

					// Parse the input.
					$string = FinderIndexerHelper::parse($string, $format);

					// Check the input.
					if (empty($string))
					{
						continue;
					}

					// Tokenize the input.
					$tokens = FinderIndexerHelper::tokenize($string, $lang);

					// Add the tokens to the database.
					$count += $this->addTokensToDb($tokens, $context);

					// Check if we're approaching the memory limit of the token table.
					if ($count > static::$state->options->get('memory_table_limit', 30000))
					{
						$this->toggleTables(false);
					}

					unset($string);
					unset($tokens);
				}
			}
			// If the input is greater than 2K in size, it is more efficient to
			// batch out the operation into smaller chunks of work.
			elseif (strlen($input) > 2048)
			{
				$start = 0;
				$end = strlen($input);
				$chunk = 2048;

				/*
				 * As it turns out, the complex regular expressions we use for
				 * sanitizing input are not very efficient when given large
				 * strings. It is much faster to process lots of short strings.
				 */
				while ($start < $end)
				{
					// Setup the string.
					$string = substr($input, $start, $chunk);

					// Find the last space character if we aren't at the end.
					$ls = (($start + $chunk) < $end ? strrpos($string, ' ') : false);

					// Truncate to the last space character.
					if ($ls !== false)
					{
						$string = substr($string, 0, $ls);
					}

					// Adjust the start position for the next iteration.
					$start += ($ls !== false ? ($ls + 1 - $chunk) + $chunk : $chunk);

					// Parse the input.
					$string = FinderIndexerHelper::parse($string, $format);

					// Check the input.
					if (empty($string))
					{
						continue;
					}

					// Tokenize the input.
					$tokens = FinderIndexerHelper::tokenize($string, $lang);

					// Add the tokens to the database.
					$count += $this->addTokensToDb($tokens, $context);

					// Check if we're approaching the memory limit of the token table.
					if ($count > static::$state->options->get('memory_table_limit', 30000))
					{
						$this->toggleTables(false);
					}
				}
			}
			else
			{
				// Parse the input.
				$input = FinderIndexerHelper::parse($input, $format);

				// Check the input.
				if (empty($input))
				{
					return $count;
				}

				// Tokenize the input.
				$tokens = FinderIndexerHelper::tokenize($input, $lang);

				// Add the tokens to the database.
				$count = $this->addTokensToDb($tokens, $context);
			}
		}

		return $count;
	}

	/**
	 * Method to add a set of tokens to the database.
	 *
	 * @param   mixed  $tokens   An array or single FinderIndexerToken object.
	 * @param   mixed  $context  The context of the tokens. See context constants. [optional]
	 *
	 * @return  integer  The number of tokens inserted into the database.
	 *
	 * @since   2.5
	 * @throws  Exception on database error.
	 */
	abstract protected function addTokensToDb($tokens, $context = '');

	/**
	 * Method to switch the token tables from Memory tables to MyISAM tables
	 * when they are close to running out of memory.
	 *
	 * @param   boolean  $memory  Flag to control how they should be toggled.
	 *
	 * @return  boolean  True on success.
	 *
	 * @since   2.5
	 * @throws  Exception on database error.
	 */
	abstract protected function toggleTables($memory);
}

Zerion Mini Shell 1.0