Function to extract keywords from a string of text

  1. /**
  2.  * Finds all of the keywords (words that appear most) on param $str
  3.  * and return them in order of most occurrences to less occurrences.
  4.  * @param string $str The string to search for the keywords.
  5.  * @param int $minWordLen[optional] The minimun length (number of chars) of a word to be considered a keyword.
  6.  * @param int $minWordOccurrences[optional] The minimun number of times a word has to appear
  7.  * on param $str to be considered a keyword.
  8.  * @param boolean $asArray[optional] Specifies if the function returns a string with the
  9.  * keywords separated by a comma ($asArray = false) or a keywords array ($asArray = true).
  10.  * @return mixed A string with keywords separated with commas if param $asArray is true,
  11.  * an array with the keywords otherwise.
  12.  */
  13. function extract_keywords($str, $minWordLen = 3, $minWordOccurrences = 2, $asArray = false)
  14. {
  15.         function keyword_count_sort($first, $sec)
  16.         {
  17.                 return $sec[1] - $first[1];
  18.         }
  19.         $str = preg_replace('/[^\p{L}0-9 ]/', ' ', $str);
  20.         $str = trim(preg_replace('/\s+/', ' ', $str));
  21.  
  22.         $words = explode(' ', $str);
  23.         $keywords = array();
  24.         while(($c_word = array_shift($words)) !== null)
  25.         {
  26.                 if(strlen($c_word) < $minWordLen) continue;
  27.  
  28.                 $c_word = strtolower($c_word);
  29.                 if(array_key_exists($c_word, $keywords)) $keywords[$c_word][1]++;
  30.                 else $keywords[$c_word] = array($c_word, 1);
  31.         }
  32.         usort($keywords, 'keyword_count_sort');
  33.  
  34.         $final_keywords = array();
  35.         foreach($keywords as $keyword_det)
  36.         {
  37.                 if($keyword_det[1] < $minWordOccurrences) break;
  38.                 array_push($final_keywords, $keyword_det[0]);
  39.         }
  40.         return $asArray ? $final_keywords : implode(', ', $final_keywords);
  41. }

連絡先: info@paste.jp
Created by Paste.jp - v7.0