[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/search/classes/ -> document.php (source)

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Document representation.
  19   *
  20   * @package    core_search
  21   * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
  22   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  namespace core_search;
  26  
  27  defined('MOODLE_INTERNAL') || die();
  28  
  29  /**
  30   * Represents a document to index.
  31   *
  32   * Note that, if you are writting a search engine and you want to change \core_search\document
  33   * behaviour, you can overwrite this class, will be automatically loaded from \search_YOURENGINE\document.
  34   *
  35   * @package    core_search
  36   * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
  37   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  38   */
  39  class document implements \renderable, \templatable {
  40  
  41      /**
  42       * @var array $data The document data.
  43       */
  44      protected $data = array();
  45  
  46      /**
  47       * @var array Extra data needed to render the document.
  48       */
  49      protected $extradata = array();
  50  
  51      /**
  52       * @var \moodle_url Link to the document.
  53       */
  54      protected $docurl = null;
  55  
  56      /**
  57       * @var \moodle_url Link to the document context.
  58       */
  59      protected $contexturl = null;
  60  
  61      /**
  62       * @var int|null The content field filearea.
  63       */
  64      protected $contentfilearea = null;
  65  
  66      /**
  67       * @var int|null The content field itemid.
  68       */
  69      protected $contentitemid = null;
  70  
  71      /**
  72       * @var bool Should be set to true if document hasn't been indexed before. False if unknown.
  73       */
  74      protected $isnew = false;
  75  
  76      /**
  77       * @var \stored_file[] An array of stored files to attach to the document.
  78       */
  79      protected $files = array();
  80  
  81      /**
  82       * All required fields any doc should contain.
  83       *
  84       * We have to choose a format to specify field types, using solr format as we have to choose one and solr is the
  85       * default search engine.
  86       *
  87       * Search engine plugins are responsible of setting their appropriate field types and map these naming to whatever format
  88       * they need.
  89       *
  90       * @var array
  91       */
  92      protected static $requiredfields = array(
  93          'id' => array(
  94              'type' => 'string',
  95              'stored' => true,
  96              'indexed' => false
  97          ),
  98          'itemid' => array(
  99              'type' => 'int',
 100              'stored' => true,
 101              'indexed' => true
 102          ),
 103          'title' => array(
 104              'type' => 'text',
 105              'stored' => true,
 106              'indexed' => true,
 107              'mainquery' => true
 108          ),
 109          'content' => array(
 110              'type' => 'text',
 111              'stored' => true,
 112              'indexed' => true,
 113              'mainquery' => true
 114          ),
 115          'contextid' => array(
 116              'type' => 'int',
 117              'stored' => true,
 118              'indexed' => true
 119          ),
 120          'areaid' => array(
 121              'type' => 'string',
 122              'stored' => true,
 123              'indexed' => true
 124          ),
 125          'type' => array(
 126              'type' => 'int',
 127              'stored' => true,
 128              'indexed' => true
 129          ),
 130          'courseid' => array(
 131              'type' => 'int',
 132              'stored' => true,
 133              'indexed' => true
 134          ),
 135          'owneruserid' => array(
 136              'type' => 'int',
 137              'stored' => true,
 138              'indexed' => true
 139          ),
 140          'modified' => array(
 141              'type' => 'tdate',
 142              'stored' => true,
 143              'indexed' => true
 144          ),
 145      );
 146  
 147      /**
 148       * All optional fields docs can contain.
 149       *
 150       * Although it matches solr fields format, this is just to define the field types. Search
 151       * engine plugins are responsible of setting their appropriate field types and map these
 152       * naming to whatever format they need.
 153       *
 154       * @var array
 155       */
 156      protected static $optionalfields = array(
 157          'userid' => array(
 158              'type' => 'int',
 159              'stored' => true,
 160              'indexed' => true
 161          ),
 162          'description1' => array(
 163              'type' => 'text',
 164              'stored' => true,
 165              'indexed' => true,
 166              'mainquery' => true
 167          ),
 168          'description2' => array(
 169              'type' => 'text',
 170              'stored' => true,
 171              'indexed' => true,
 172              'mainquery' => true
 173          )
 174      );
 175  
 176      /**
 177       * Any fields that are engine specifc. These are fields that are solely used by a search engine plugin
 178       * for internal purposes.
 179       *
 180       * Field names should be prefixed with engine name to avoid potential conflict with core fields.
 181       *
 182       * Uses same format as fields above.
 183       *
 184       * @var array
 185       */
 186      protected static $enginefields = array();
 187  
 188      /**
 189       * We ensure that the document has a unique id across search areas.
 190       *
 191       * @param int $itemid An id unique to the search area
 192       * @param string $componentname The search area component Frankenstyle name
 193       * @param string $areaname The area name (the search area class name)
 194       * @return void
 195       */
 196      public function __construct($itemid, $componentname, $areaname) {
 197  
 198          if (!is_numeric($itemid)) {
 199              throw new \coding_exception('The itemid should be an integer');
 200          }
 201  
 202          $this->data['areaid'] = \core_search\manager::generate_areaid($componentname, $areaname);
 203          $this->data['id'] = $this->data['areaid'] . '-' . $itemid;
 204          $this->data['itemid'] = intval($itemid);
 205      }
 206  
 207      /**
 208       * Add a stored file to the document.
 209       *
 210       * @param \stored_file|int $file The file to add, or file id.
 211       * @return void
 212       */
 213      public function add_stored_file($file) {
 214          if (is_numeric($file)) {
 215              $this->files[$file] = $file;
 216          } else {
 217              $this->files[$file->get_id()] = $file;
 218          }
 219      }
 220  
 221      /**
 222       * Returns the array of attached files.
 223       *
 224       * @return \stored_file[]
 225       */
 226      public function get_files() {
 227          // The files array can contain stored file ids, so we need to get instances if asked.
 228          foreach ($this->files as $id => $listfile) {
 229              if (is_numeric($listfile)) {
 230                  $fs = get_file_storage();
 231  
 232                  if ($file = $fs->get_file_by_id($id)) {
 233                      $this->files[$id] = $file;
 234                  }
 235              }
 236          }
 237  
 238          return $this->files;
 239      }
 240  
 241      /**
 242       * Setter.
 243       *
 244       * Basic checkings to prevent common issues.
 245       *
 246       * If the field is a string tags will be stripped, if it is an integer or a date it
 247       * will be casted to a PHP integer. tdate fields values are expected to be timestamps.
 248       *
 249       * @throws \coding_exception
 250       * @param string $fieldname The field name
 251       * @param string|int $value The value to store
 252       * @return string|int The stored value
 253       */
 254      public function set($fieldname, $value) {
 255  
 256          if (!empty(static::$requiredfields[$fieldname])) {
 257              $fielddata = static::$requiredfields[$fieldname];
 258          } else if (!empty(static::$optionalfields[$fieldname])) {
 259              $fielddata = static::$optionalfields[$fieldname];
 260          } else if (!empty(static::$enginefields[$fieldname])) {
 261              $fielddata = static::$enginefields[$fieldname];
 262          }
 263  
 264          if (empty($fielddata)) {
 265              throw new \coding_exception('"' . $fieldname . '" field does not exist.');
 266          }
 267  
 268          // tdate fields should be set as timestamps, later they might be converted to
 269          // a date format, it depends on the search engine.
 270          if (($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') && !is_numeric($value)) {
 271              throw new \coding_exception('"' . $fieldname . '" value should be an integer and its value is "' . $value . '"');
 272          }
 273  
 274          // We want to be strict here, there might be engines that expect us to
 275          // provide them data with the proper type already set.
 276          if ($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') {
 277              $this->data[$fieldname] = intval($value);
 278          } else {
 279              // Replace all groups of line breaks and spaces by single spaces.
 280              $this->data[$fieldname] = preg_replace("/\s+/u", " ", $value);
 281          }
 282  
 283          return $this->data[$fieldname];
 284      }
 285  
 286      /**
 287       * Sets data to this->extradata
 288       *
 289       * This data can be retrieved using \core_search\document->get($fieldname).
 290       *
 291       * @param string $fieldname
 292       * @param string $value
 293       * @return void
 294       */
 295      public function set_extra($fieldname, $value) {
 296          $this->extradata[$fieldname] = $value;
 297      }
 298  
 299      /**
 300       * Getter.
 301       *
 302       * Use self::is_set if you are not sure if this field is set or not
 303       * as otherwise it will trigger a \coding_exception
 304       *
 305       * @throws \coding_exception
 306       * @param string $field
 307       * @return string|int
 308       */
 309      public function get($field) {
 310  
 311          if (isset($this->data[$field])) {
 312              return $this->data[$field];
 313          }
 314  
 315          // Fallback to extra data.
 316          if (isset($this->extradata[$field])) {
 317              return $this->extradata[$field];
 318          }
 319  
 320          throw new \coding_exception('Field "' . $field . '" is not set in the document');
 321      }
 322  
 323      /**
 324       * Checks if a field is set.
 325       *
 326       * @param string $field
 327       * @return bool
 328       */
 329      public function is_set($field) {
 330          return (isset($this->data[$field]) || isset($this->extradata[$field]));
 331      }
 332  
 333      /**
 334       * Set if this is a new document. False if unknown.
 335       *
 336       * @param bool $new
 337       */
 338      public function set_is_new($new) {
 339         $this->isnew = (bool)$new;
 340      }
 341  
 342      /**
 343       * Returns if the document is new. False if unknown.
 344       *
 345       * @return bool
 346       */
 347      public function get_is_new() {
 348         return $this->isnew;
 349      }
 350  
 351      /**
 352       * Returns all default fields definitions.
 353       *
 354       * @return array
 355       */
 356      public static function get_default_fields_definition() {
 357          return static::$requiredfields + static::$optionalfields + static::$enginefields;
 358      }
 359  
 360      /**
 361       * Formats the timestamp preparing the time fields to be inserted into the search engine.
 362       *
 363       * By default it just returns a timestamp so any search engine could just store integers
 364       * and use integers comparison to get documents between x and y timestamps, but search
 365       * engines might be interested in using their own field formats. They can do it extending
 366       * this class in \search_xxx\document.
 367       *
 368       * @param int $timestamp
 369       * @return string
 370       */
 371      public static function format_time_for_engine($timestamp) {
 372          return $timestamp;
 373      }
 374  
 375      /**
 376       * Formats a string value for the search engine.
 377       *
 378       * Search engines may overwrite this method to apply restrictions, like limiting the size.
 379       * The default behaviour is just returning the string.
 380       *
 381       * @param string $string
 382       * @return string
 383       */
 384      public static function format_string_for_engine($string) {
 385          return $string;
 386      }
 387  
 388      /**
 389       * Formats a text value for the search engine.
 390       *
 391       * Search engines may overwrite this method to apply restrictions, like limiting the size.
 392       * The default behaviour is just returning the string.
 393       *
 394       * @param string $text
 395       * @return string
 396       */
 397      public static function format_text_for_engine($text) {
 398          return $text;
 399      }
 400  
 401      /**
 402       * Returns a timestamp from the value stored in the search engine.
 403       *
 404       * By default it just returns a timestamp so any search engine could just store integers
 405       * and use integers comparison to get documents between x and y timestamps, but search
 406       * engines might be interested in using their own field formats. They should do it extending
 407       * this class in \search_xxx\document.
 408       *
 409       * @param string $time
 410       * @return int
 411       */
 412      public static function import_time_from_engine($time) {
 413          return $time;
 414      }
 415  
 416      /**
 417       * Returns how text is returned from the search engine.
 418       *
 419       * @return int
 420       */
 421      protected function get_text_format() {
 422          return FORMAT_PLAIN;
 423      }
 424  
 425      /**
 426       * Fills the document with data coming from the search engine.
 427       *
 428       * @throws \core_search\engine_exception
 429       * @param array $docdata
 430       * @return void
 431       */
 432      public function set_data_from_engine($docdata) {
 433          $fields = static::$requiredfields + static::$optionalfields + static::$enginefields;
 434          foreach ($fields as $fieldname => $field) {
 435  
 436              // Optional params might not be there.
 437              if (isset($docdata[$fieldname])) {
 438                  if ($field['type'] === 'tdate') {
 439                      // Time fields may need a preprocessing.
 440                      $this->set($fieldname, static::import_time_from_engine($docdata[$fieldname]));
 441                  } else {
 442                      // No way we can make this work if there is any multivalue field.
 443                      if (is_array($docdata[$fieldname])) {
 444                          throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $fieldname);
 445                      }
 446                      $this->set($fieldname, $docdata[$fieldname]);
 447                  }
 448              }
 449          }
 450      }
 451  
 452      /**
 453       * Sets the document url.
 454       *
 455       * @param \moodle_url $url
 456       * @return void
 457       */
 458      public function set_doc_url(\moodle_url $url) {
 459          $this->docurl = $url;
 460      }
 461  
 462      /**
 463       * Gets the url to the doc.
 464       *
 465       * @return \moodle_url
 466       */
 467      public function get_doc_url() {
 468          return $this->docurl;
 469      }
 470  
 471      public function set_context_url(\moodle_url $url) {
 472          $this->contexturl = $url;
 473      }
 474  
 475      /**
 476       * Gets the url to the context.
 477       *
 478       * @return \moodle_url
 479       */
 480      public function get_context_url() {
 481          return $this->contexturl;
 482      }
 483  
 484      /**
 485       * Returns the document ready to submit to the search engine.
 486       *
 487       * @throws \coding_exception
 488       * @return array
 489       */
 490      public function export_for_engine() {
 491          // Set any unset defaults.
 492          $this->apply_defaults();
 493  
 494          // We don't want to affect the document instance.
 495          $data = $this->data;
 496  
 497          // Apply specific engine-dependant formats and restrictions.
 498          foreach (static::$requiredfields as $fieldname => $field) {
 499  
 500              // We also check that we have everything we need.
 501              if (!isset($data[$fieldname])) {
 502                  throw new \coding_exception('Missing "' . $fieldname . '" field in document with id "' . $this->data['id'] . '"');
 503              }
 504  
 505              if ($field['type'] === 'tdate') {
 506                  // Overwrite the timestamp with the engine dependant format.
 507                  $data[$fieldname] = static::format_time_for_engine($data[$fieldname]);
 508              } else if ($field['type'] === 'string') {
 509                  // Overwrite the string with the engine dependant format.
 510                  $data[$fieldname] = static::format_string_for_engine($data[$fieldname]);
 511              } else if ($field['type'] === 'text') {
 512                  // Overwrite the text with the engine dependant format.
 513                  $data[$fieldname] = static::format_text_for_engine($data[$fieldname]);
 514              }
 515  
 516          }
 517  
 518          $fields = static::$optionalfields + static::$enginefields;
 519          foreach ($fields as $fieldname => $field) {
 520              if (!isset($data[$fieldname])) {
 521                  continue;
 522              }
 523              if ($field['type'] === 'tdate') {
 524                  // Overwrite the timestamp with the engine dependant format.
 525                  $data[$fieldname] = static::format_time_for_engine($data[$fieldname]);
 526              } else if ($field['type'] === 'string') {
 527                  // Overwrite the string with the engine dependant format.
 528                  $data[$fieldname] = static::format_string_for_engine($data[$fieldname]);
 529              } else if ($field['type'] === 'text') {
 530                  // Overwrite the text with the engine dependant format.
 531                  $data[$fieldname] = static::format_text_for_engine($data[$fieldname]);
 532              }
 533          }
 534  
 535          return $data;
 536      }
 537  
 538      /**
 539       * Apply any defaults to unset fields before export. Called after document building, but before export.
 540       *
 541       * Sub-classes of this should make sure to call parent::apply_defaults().
 542       */
 543      protected function apply_defaults() {
 544          // Set the default type, TYPE_TEXT.
 545          if (!isset($this->data['type'])) {
 546              $this->data['type'] = manager::TYPE_TEXT;
 547          }
 548      }
 549  
 550      /**
 551       * Export the document data to be used as a template context.
 552       *
 553       * Adding more info than the required one as people might be interested in extending the template.
 554       *
 555       * Although content is a required field when setting up the document, it accepts '' (empty) values
 556       * as they may be the result of striping out HTML.
 557       *
 558       * SECURITY NOTE: It is the responsibility of the document to properly escape any text to be displayed.
 559       * The renderer will output the content without any further cleaning.
 560       *
 561       * @param renderer_base $output The renderer.
 562       * @return array
 563       */
 564      public function export_for_template(\renderer_base $output) {
 565          list($componentname, $areaname) = \core_search\manager::extract_areaid_parts($this->get('areaid'));
 566  
 567          $title = $this->is_set('title') ? $this->format_text($this->get('title')) : '';
 568          $data = [
 569              'componentname' => $componentname,
 570              'areaname' => $areaname,
 571              'courseurl' => course_get_url($this->get('courseid')),
 572              'coursefullname' => format_string($this->get('coursefullname'), true, array('context' => $this->get('contextid'))),
 573              'modified' => userdate($this->get('modified')),
 574              'title' => ($title !== '') ? $title : get_string('notitle', 'search'),
 575              'docurl' => $this->get_doc_url(),
 576              'content' => $this->is_set('content') ? $this->format_text($this->get('content')) : null,
 577              'contexturl' => $this->get_context_url(),
 578              'description1' => $this->is_set('description1') ? $this->format_text($this->get('description1')) : null,
 579              'description2' => $this->is_set('description2') ? $this->format_text($this->get('description2')) : null,
 580          ];
 581  
 582          // Now take any attached any files.
 583          $files = $this->get_files();
 584          if (!empty($files)) {
 585              if (count($files) > 1) {
 586                  $filenames = array();
 587                  foreach ($files as $file) {
 588                      $filenames[] = format_string($file->get_filename(), true, array('context' => $this->get('contextid')));
 589                  }
 590                  $data['multiplefiles'] = true;
 591                  $data['filenames'] = $filenames;
 592              } else {
 593                  $file = reset($files);
 594                  $data['filename'] = format_string($file->get_filename(), true, array('context' => $this->get('contextid')));
 595              }
 596          }
 597  
 598          if ($this->is_set('userid')) {
 599              $data['userurl'] = new \moodle_url('/user/view.php', array('id' => $this->get('userid'), 'course' => $this->get('courseid')));
 600              $data['userfullname'] = format_string($this->get('userfullname'), true, array('context' => $this->get('contextid')));
 601          }
 602  
 603          return $data;
 604      }
 605  
 606      /**
 607       * Formats a text string coming from the search engine.
 608       *
 609       * By default just return the text as it is:
 610       * - Search areas are responsible of sending just plain data, the search engine may
 611       *   append HTML or markdown to it (highlighing for example).
 612       * - The view is responsible of shortening the text if it is too big
 613       *
 614       * @param  string $text Text to format
 615       * @return string HTML text to be renderer
 616       */
 617      protected function format_text($text) {
 618          return format_text($text, $this->get_text_format(), array('context' => $this->get('contextid')));
 619      }
 620  }


Generated: Thu Aug 11 10:00:09 2016 Cross-referenced by PHPXref 0.7.1