[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 <?php 2 // This file is part of Moodle - http://moodle.org/ 3 // 4 // Moodle is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // Moodle is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License 15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 16 17 /** 18 * Document representation. 19 * 20 * @package core_search 21 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com} 22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 23 */ 24 25 namespace core_search; 26 27 defined('MOODLE_INTERNAL') || die(); 28 29 /** 30 * Represents a document to index. 31 * 32 * Note that, if you are writting a search engine and you want to change \core_search\document 33 * behaviour, you can overwrite this class, will be automatically loaded from \search_YOURENGINE\document. 34 * 35 * @package core_search 36 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com} 37 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 38 */ 39 class document implements \renderable, \templatable { 40 41 /** 42 * @var array $data The document data. 43 */ 44 protected $data = array(); 45 46 /** 47 * @var array Extra data needed to render the document. 48 */ 49 protected $extradata = array(); 50 51 /** 52 * @var \moodle_url Link to the document. 53 */ 54 protected $docurl = null; 55 56 /** 57 * @var \moodle_url Link to the document context. 58 */ 59 protected $contexturl = null; 60 61 /** 62 * @var int|null The content field filearea. 63 */ 64 protected $contentfilearea = null; 65 66 /** 67 * @var int|null The content field itemid. 68 */ 69 protected $contentitemid = null; 70 71 /** 72 * @var bool Should be set to true if document hasn't been indexed before. False if unknown. 73 */ 74 protected $isnew = false; 75 76 /** 77 * @var \stored_file[] An array of stored files to attach to the document. 78 */ 79 protected $files = array(); 80 81 /** 82 * All required fields any doc should contain. 83 * 84 * We have to choose a format to specify field types, using solr format as we have to choose one and solr is the 85 * default search engine. 86 * 87 * Search engine plugins are responsible of setting their appropriate field types and map these naming to whatever format 88 * they need. 89 * 90 * @var array 91 */ 92 protected static $requiredfields = array( 93 'id' => array( 94 'type' => 'string', 95 'stored' => true, 96 'indexed' => false 97 ), 98 'itemid' => array( 99 'type' => 'int', 100 'stored' => true, 101 'indexed' => true 102 ), 103 'title' => array( 104 'type' => 'text', 105 'stored' => true, 106 'indexed' => true, 107 'mainquery' => true 108 ), 109 'content' => array( 110 'type' => 'text', 111 'stored' => true, 112 'indexed' => true, 113 'mainquery' => true 114 ), 115 'contextid' => array( 116 'type' => 'int', 117 'stored' => true, 118 'indexed' => true 119 ), 120 'areaid' => array( 121 'type' => 'string', 122 'stored' => true, 123 'indexed' => true 124 ), 125 'type' => array( 126 'type' => 'int', 127 'stored' => true, 128 'indexed' => true 129 ), 130 'courseid' => array( 131 'type' => 'int', 132 'stored' => true, 133 'indexed' => true 134 ), 135 'owneruserid' => array( 136 'type' => 'int', 137 'stored' => true, 138 'indexed' => true 139 ), 140 'modified' => array( 141 'type' => 'tdate', 142 'stored' => true, 143 'indexed' => true 144 ), 145 ); 146 147 /** 148 * All optional fields docs can contain. 149 * 150 * Although it matches solr fields format, this is just to define the field types. Search 151 * engine plugins are responsible of setting their appropriate field types and map these 152 * naming to whatever format they need. 153 * 154 * @var array 155 */ 156 protected static $optionalfields = array( 157 'userid' => array( 158 'type' => 'int', 159 'stored' => true, 160 'indexed' => true 161 ), 162 'description1' => array( 163 'type' => 'text', 164 'stored' => true, 165 'indexed' => true, 166 'mainquery' => true 167 ), 168 'description2' => array( 169 'type' => 'text', 170 'stored' => true, 171 'indexed' => true, 172 'mainquery' => true 173 ) 174 ); 175 176 /** 177 * Any fields that are engine specifc. These are fields that are solely used by a search engine plugin 178 * for internal purposes. 179 * 180 * Field names should be prefixed with engine name to avoid potential conflict with core fields. 181 * 182 * Uses same format as fields above. 183 * 184 * @var array 185 */ 186 protected static $enginefields = array(); 187 188 /** 189 * We ensure that the document has a unique id across search areas. 190 * 191 * @param int $itemid An id unique to the search area 192 * @param string $componentname The search area component Frankenstyle name 193 * @param string $areaname The area name (the search area class name) 194 * @return void 195 */ 196 public function __construct($itemid, $componentname, $areaname) { 197 198 if (!is_numeric($itemid)) { 199 throw new \coding_exception('The itemid should be an integer'); 200 } 201 202 $this->data['areaid'] = \core_search\manager::generate_areaid($componentname, $areaname); 203 $this->data['id'] = $this->data['areaid'] . '-' . $itemid; 204 $this->data['itemid'] = intval($itemid); 205 } 206 207 /** 208 * Add a stored file to the document. 209 * 210 * @param \stored_file|int $file The file to add, or file id. 211 * @return void 212 */ 213 public function add_stored_file($file) { 214 if (is_numeric($file)) { 215 $this->files[$file] = $file; 216 } else { 217 $this->files[$file->get_id()] = $file; 218 } 219 } 220 221 /** 222 * Returns the array of attached files. 223 * 224 * @return \stored_file[] 225 */ 226 public function get_files() { 227 // The files array can contain stored file ids, so we need to get instances if asked. 228 foreach ($this->files as $id => $listfile) { 229 if (is_numeric($listfile)) { 230 $fs = get_file_storage(); 231 232 if ($file = $fs->get_file_by_id($id)) { 233 $this->files[$id] = $file; 234 } 235 } 236 } 237 238 return $this->files; 239 } 240 241 /** 242 * Setter. 243 * 244 * Basic checkings to prevent common issues. 245 * 246 * If the field is a string tags will be stripped, if it is an integer or a date it 247 * will be casted to a PHP integer. tdate fields values are expected to be timestamps. 248 * 249 * @throws \coding_exception 250 * @param string $fieldname The field name 251 * @param string|int $value The value to store 252 * @return string|int The stored value 253 */ 254 public function set($fieldname, $value) { 255 256 if (!empty(static::$requiredfields[$fieldname])) { 257 $fielddata = static::$requiredfields[$fieldname]; 258 } else if (!empty(static::$optionalfields[$fieldname])) { 259 $fielddata = static::$optionalfields[$fieldname]; 260 } else if (!empty(static::$enginefields[$fieldname])) { 261 $fielddata = static::$enginefields[$fieldname]; 262 } 263 264 if (empty($fielddata)) { 265 throw new \coding_exception('"' . $fieldname . '" field does not exist.'); 266 } 267 268 // tdate fields should be set as timestamps, later they might be converted to 269 // a date format, it depends on the search engine. 270 if (($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') && !is_numeric($value)) { 271 throw new \coding_exception('"' . $fieldname . '" value should be an integer and its value is "' . $value . '"'); 272 } 273 274 // We want to be strict here, there might be engines that expect us to 275 // provide them data with the proper type already set. 276 if ($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') { 277 $this->data[$fieldname] = intval($value); 278 } else { 279 // Replace all groups of line breaks and spaces by single spaces. 280 $this->data[$fieldname] = preg_replace("/\s+/u", " ", $value); 281 } 282 283 return $this->data[$fieldname]; 284 } 285 286 /** 287 * Sets data to this->extradata 288 * 289 * This data can be retrieved using \core_search\document->get($fieldname). 290 * 291 * @param string $fieldname 292 * @param string $value 293 * @return void 294 */ 295 public function set_extra($fieldname, $value) { 296 $this->extradata[$fieldname] = $value; 297 } 298 299 /** 300 * Getter. 301 * 302 * Use self::is_set if you are not sure if this field is set or not 303 * as otherwise it will trigger a \coding_exception 304 * 305 * @throws \coding_exception 306 * @param string $field 307 * @return string|int 308 */ 309 public function get($field) { 310 311 if (isset($this->data[$field])) { 312 return $this->data[$field]; 313 } 314 315 // Fallback to extra data. 316 if (isset($this->extradata[$field])) { 317 return $this->extradata[$field]; 318 } 319 320 throw new \coding_exception('Field "' . $field . '" is not set in the document'); 321 } 322 323 /** 324 * Checks if a field is set. 325 * 326 * @param string $field 327 * @return bool 328 */ 329 public function is_set($field) { 330 return (isset($this->data[$field]) || isset($this->extradata[$field])); 331 } 332 333 /** 334 * Set if this is a new document. False if unknown. 335 * 336 * @param bool $new 337 */ 338 public function set_is_new($new) { 339 $this->isnew = (bool)$new; 340 } 341 342 /** 343 * Returns if the document is new. False if unknown. 344 * 345 * @return bool 346 */ 347 public function get_is_new() { 348 return $this->isnew; 349 } 350 351 /** 352 * Returns all default fields definitions. 353 * 354 * @return array 355 */ 356 public static function get_default_fields_definition() { 357 return static::$requiredfields + static::$optionalfields + static::$enginefields; 358 } 359 360 /** 361 * Formats the timestamp preparing the time fields to be inserted into the search engine. 362 * 363 * By default it just returns a timestamp so any search engine could just store integers 364 * and use integers comparison to get documents between x and y timestamps, but search 365 * engines might be interested in using their own field formats. They can do it extending 366 * this class in \search_xxx\document. 367 * 368 * @param int $timestamp 369 * @return string 370 */ 371 public static function format_time_for_engine($timestamp) { 372 return $timestamp; 373 } 374 375 /** 376 * Formats a string value for the search engine. 377 * 378 * Search engines may overwrite this method to apply restrictions, like limiting the size. 379 * The default behaviour is just returning the string. 380 * 381 * @param string $string 382 * @return string 383 */ 384 public static function format_string_for_engine($string) { 385 return $string; 386 } 387 388 /** 389 * Formats a text value for the search engine. 390 * 391 * Search engines may overwrite this method to apply restrictions, like limiting the size. 392 * The default behaviour is just returning the string. 393 * 394 * @param string $text 395 * @return string 396 */ 397 public static function format_text_for_engine($text) { 398 return $text; 399 } 400 401 /** 402 * Returns a timestamp from the value stored in the search engine. 403 * 404 * By default it just returns a timestamp so any search engine could just store integers 405 * and use integers comparison to get documents between x and y timestamps, but search 406 * engines might be interested in using their own field formats. They should do it extending 407 * this class in \search_xxx\document. 408 * 409 * @param string $time 410 * @return int 411 */ 412 public static function import_time_from_engine($time) { 413 return $time; 414 } 415 416 /** 417 * Returns how text is returned from the search engine. 418 * 419 * @return int 420 */ 421 protected function get_text_format() { 422 return FORMAT_PLAIN; 423 } 424 425 /** 426 * Fills the document with data coming from the search engine. 427 * 428 * @throws \core_search\engine_exception 429 * @param array $docdata 430 * @return void 431 */ 432 public function set_data_from_engine($docdata) { 433 $fields = static::$requiredfields + static::$optionalfields + static::$enginefields; 434 foreach ($fields as $fieldname => $field) { 435 436 // Optional params might not be there. 437 if (isset($docdata[$fieldname])) { 438 if ($field['type'] === 'tdate') { 439 // Time fields may need a preprocessing. 440 $this->set($fieldname, static::import_time_from_engine($docdata[$fieldname])); 441 } else { 442 // No way we can make this work if there is any multivalue field. 443 if (is_array($docdata[$fieldname])) { 444 throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $fieldname); 445 } 446 $this->set($fieldname, $docdata[$fieldname]); 447 } 448 } 449 } 450 } 451 452 /** 453 * Sets the document url. 454 * 455 * @param \moodle_url $url 456 * @return void 457 */ 458 public function set_doc_url(\moodle_url $url) { 459 $this->docurl = $url; 460 } 461 462 /** 463 * Gets the url to the doc. 464 * 465 * @return \moodle_url 466 */ 467 public function get_doc_url() { 468 return $this->docurl; 469 } 470 471 public function set_context_url(\moodle_url $url) { 472 $this->contexturl = $url; 473 } 474 475 /** 476 * Gets the url to the context. 477 * 478 * @return \moodle_url 479 */ 480 public function get_context_url() { 481 return $this->contexturl; 482 } 483 484 /** 485 * Returns the document ready to submit to the search engine. 486 * 487 * @throws \coding_exception 488 * @return array 489 */ 490 public function export_for_engine() { 491 // Set any unset defaults. 492 $this->apply_defaults(); 493 494 // We don't want to affect the document instance. 495 $data = $this->data; 496 497 // Apply specific engine-dependant formats and restrictions. 498 foreach (static::$requiredfields as $fieldname => $field) { 499 500 // We also check that we have everything we need. 501 if (!isset($data[$fieldname])) { 502 throw new \coding_exception('Missing "' . $fieldname . '" field in document with id "' . $this->data['id'] . '"'); 503 } 504 505 if ($field['type'] === 'tdate') { 506 // Overwrite the timestamp with the engine dependant format. 507 $data[$fieldname] = static::format_time_for_engine($data[$fieldname]); 508 } else if ($field['type'] === 'string') { 509 // Overwrite the string with the engine dependant format. 510 $data[$fieldname] = static::format_string_for_engine($data[$fieldname]); 511 } else if ($field['type'] === 'text') { 512 // Overwrite the text with the engine dependant format. 513 $data[$fieldname] = static::format_text_for_engine($data[$fieldname]); 514 } 515 516 } 517 518 $fields = static::$optionalfields + static::$enginefields; 519 foreach ($fields as $fieldname => $field) { 520 if (!isset($data[$fieldname])) { 521 continue; 522 } 523 if ($field['type'] === 'tdate') { 524 // Overwrite the timestamp with the engine dependant format. 525 $data[$fieldname] = static::format_time_for_engine($data[$fieldname]); 526 } else if ($field['type'] === 'string') { 527 // Overwrite the string with the engine dependant format. 528 $data[$fieldname] = static::format_string_for_engine($data[$fieldname]); 529 } else if ($field['type'] === 'text') { 530 // Overwrite the text with the engine dependant format. 531 $data[$fieldname] = static::format_text_for_engine($data[$fieldname]); 532 } 533 } 534 535 return $data; 536 } 537 538 /** 539 * Apply any defaults to unset fields before export. Called after document building, but before export. 540 * 541 * Sub-classes of this should make sure to call parent::apply_defaults(). 542 */ 543 protected function apply_defaults() { 544 // Set the default type, TYPE_TEXT. 545 if (!isset($this->data['type'])) { 546 $this->data['type'] = manager::TYPE_TEXT; 547 } 548 } 549 550 /** 551 * Export the document data to be used as a template context. 552 * 553 * Adding more info than the required one as people might be interested in extending the template. 554 * 555 * Although content is a required field when setting up the document, it accepts '' (empty) values 556 * as they may be the result of striping out HTML. 557 * 558 * SECURITY NOTE: It is the responsibility of the document to properly escape any text to be displayed. 559 * The renderer will output the content without any further cleaning. 560 * 561 * @param renderer_base $output The renderer. 562 * @return array 563 */ 564 public function export_for_template(\renderer_base $output) { 565 list($componentname, $areaname) = \core_search\manager::extract_areaid_parts($this->get('areaid')); 566 567 $title = $this->is_set('title') ? $this->format_text($this->get('title')) : ''; 568 $data = [ 569 'componentname' => $componentname, 570 'areaname' => $areaname, 571 'courseurl' => course_get_url($this->get('courseid')), 572 'coursefullname' => format_string($this->get('coursefullname'), true, array('context' => $this->get('contextid'))), 573 'modified' => userdate($this->get('modified')), 574 'title' => ($title !== '') ? $title : get_string('notitle', 'search'), 575 'docurl' => $this->get_doc_url(), 576 'content' => $this->is_set('content') ? $this->format_text($this->get('content')) : null, 577 'contexturl' => $this->get_context_url(), 578 'description1' => $this->is_set('description1') ? $this->format_text($this->get('description1')) : null, 579 'description2' => $this->is_set('description2') ? $this->format_text($this->get('description2')) : null, 580 ]; 581 582 // Now take any attached any files. 583 $files = $this->get_files(); 584 if (!empty($files)) { 585 if (count($files) > 1) { 586 $filenames = array(); 587 foreach ($files as $file) { 588 $filenames[] = format_string($file->get_filename(), true, array('context' => $this->get('contextid'))); 589 } 590 $data['multiplefiles'] = true; 591 $data['filenames'] = $filenames; 592 } else { 593 $file = reset($files); 594 $data['filename'] = format_string($file->get_filename(), true, array('context' => $this->get('contextid'))); 595 } 596 } 597 598 if ($this->is_set('userid')) { 599 $data['userurl'] = new \moodle_url('/user/view.php', array('id' => $this->get('userid'), 'course' => $this->get('courseid'))); 600 $data['userfullname'] = format_string($this->get('userfullname'), true, array('context' => $this->get('contextid'))); 601 } 602 603 return $data; 604 } 605 606 /** 607 * Formats a text string coming from the search engine. 608 * 609 * By default just return the text as it is: 610 * - Search areas are responsible of sending just plain data, the search engine may 611 * append HTML or markdown to it (highlighing for example). 612 * - The view is responsible of shortening the text if it is too big 613 * 614 * @param string $text Text to format 615 * @return string HTML text to be renderer 616 */ 617 protected function format_text($text) { 618 return format_text($text, $this->get_text_format(), array('context' => $this->get('contextid'))); 619 } 620 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Thu Aug 11 10:00:09 2016 | Cross-referenced by PHPXref 0.7.1 |