Semanta corpora and discussion partners.

ENTRY ".$entry." INVR DOMN ".$invr_domn; if ($entry != "." && $entry != "..") { $line_cntr=0; // print_r($corp_rslt); $file_array[$e]= $entry; if ($entry[0] == $invr_cont and $invr_domn == null) { $file_array[$e]= $entry; // echo "
ENTRY ".$entry; } if ($invr_domn == $entry) { $file_array[$e]= $entry; // echo "
ENTRY ".$entry; } $e=$e+1; } } } } // echo "
Samples only for testing."; // print_r($file_array); ////////////////////////////// if($invr_taal == null) { $invr_taal="nederlands"; } echo ""; foreach($file_array as $file_key => $file_val) { ////////////////////// KENNISDOMEINEN ////////////// // echo "
FILE VAL ".$file_val." INVR CORP ".$invr_corp; if ($invr_corp == $file_val or $invr_bzkr== $file_val) { $chbt_rslt=xtrt_xxxx($file_val,$invr_taal,"base"); // ksort($chbt_rslt); // echo "
CHATBOTS ".count($chbt_rslt); print_r($chbt_rslt); if (count($chbt_rslt) > 0) { foreach($chbt_rslt as $rule_key =>$rule_val) { $rule_itms=explode("!!",$rule_key); // print_r($rule_itms); echo "
RULE KEY ". $rule_key." RULE VAL ".$rule_val; $tref_scre=$rule_itms[0]; $tref_taal=$rule_itms[1]; $tref_chbt=$rule_itms[2]; $tref_domn=$rule_itms[3]; $domn_array[$tref_corp."@".$tref_chbt]=$domn_array[$tref_domn."@".$tref_chbt]+1; $tref_corp=$rule_itms[3]; if ($tref_corp == $file_val) { $corp_array[$tref_corp]=$corp_array[$tref_corp]+1; $tref_datm=$rule_itms[5]; $tref_levl=$rule_itms[6]; $tref_ipad=$rule_itms[7]; ////////////// $file_name='./data/'.$tref_corp."/lnk00000.dat"; echo "
".$file_name; if (file_exists($file_name)) { // echo "
De eerste 12 regels van de eerste pagina van de website van ".$perc_val." en de aangewezen gesprekspartner ".$perc_chbt." bestaat uit de volgende regels
"; // echo "
"; // echo "
".$perc_key. " VAL ".$perc_val.""; // echo "The file $file_name exists"; $sw_corp=1; $f = fopen($file_name, "r"); $word_count=0; $line_count=0; $nl=0; while ($line = fgets($f, 100000) and $nl < 3) { $nl=$nl+1; $str_repl_line=$line; require_once("ustr-repl-nl.php"); // echo "
".urlencode($line); $str_repl_line=trim($str_repl_line); $str_repl_line=strtolower($str_repl_line); $lstr_repl_line=str_replace("the_title:","",$str_repl_line); // echo "
LINE ".$str_repl_line; if (substr($str_repl_line,0,10) == "the_title:") { if ($str_repl_line != null and $srelt != $str_repl_line) { $bzv1_dtls=explode(" ",$str_repl_line); foreach($bzv1_dtls as $dtls_key => $dtls_val) { if (strlen($dtls_val) > 3) { $sstr_repl_line=str_replace(" ","+",$lstr_repl_line); $ltref_domn="".$tref_corp.""; $dtls_rslt[$lstr_repl_line."!!".$ltref_domn]=$dtls_rslt[$lstr_repl_line."!!".$ltref_domn]+1; } } $invr_bzv1=substr($str_repl_line,10,256); $sinvr_bzv1=str_replace(" ","+",$invr_bzv1); // echo '
Corpus<category><pattern>'.$tref_corp.'</pattern><template><think><set name="invrcorp">'.$tref_corp.'</set><set name="invrchbt">'.$tref_chbt.'</set><set name="invrcorp">'.$tref_corp.'</set><set name="invrchbt">'.$tref_chbt.'</set><set name="invrtaal">'.$tref_taal.'</set><set name="invrsmdm">'.$tref_domn.'</set><set name="invripad">http://www.'.$tref_ipad.'</set></think><link><text>'.$tref_chbt.'</text><url>https://www.semanta.nl/semanta/gesprek.php?chbt=<get name="invrchbt"></get></url></link></template></category>'; // echo '
Chatbot<category><pattern>'.$tref_chbt.'</pattern><template><think><get name="invrcorp">'.$tref_corp.'</get><get name="invrchbt">'.$tref_chbt.'</get><get name="invrcorp">'.$tref_corp.'</get><get name="invrchbt">'.$tref_chbt.'</get><get name="invrtaal">'.$tref_taal.'</get><get name="invrsmdm">'.$tref_domn.'</get><get name="invripad">http://www.'.$tref_ipad.'</get></think><link><text>'.$tref_chbt.'</text><url>https://www.semanta.nl/semanta/gesprek.php?chbt=<get name="invrchbt"></get></url></link></template></category>'; $ltref_domn="".$tref_corp.""; // echo ""; // echo ""; $srelt = $str_repl_line; } } } $corp_array[$perc_val]=$corp_array[$perc_val]+1; } else { // echo "
".$perc_val.""; } //////////////////////////////////// $chbt_array[$tref_corp."@".$tref_chbt."@".$tref_taal."@".$tref_domn."@".$tref_ipad."@".$str_repl_line]=$chbt_array[$tref_corp."@".$tref_chbt."@".$tref_taal."@".$tref_domn."@".$tref_ipad."@".$str_repl_line]+1; echo ""; } } } } } asort($dtls_rslt); // print_r($dtls_rslt); echo "
".$tref_taal."".$ltref_domn."".substr($str_repl_line,10,256)."".$tref_chbt."".$tref_corp."
".substr($str_repl_line,10,256)."
".substr($str_repl_line,10,256)."".$tref_chbt."".$tref_corp."
"; foreach($dtls_rslt as $rslt_key => $rslt_val) { echo ""; // echo "
DTLS KEY ".$rslt_key."DTLS VAL ".$rslt_val; $rslt_rslt=explode("!!",$rslt_key); // print_r($rslt_rslt); if ($drelt != $rslt_rslt[1]) { echo "
".$rslt_rslt[1]."".$rslt_rslt[0]; $drelt = $rslt_rslt[1]; } else { echo ", ".$rslt_rslt[0]; } } echo "
"; // print_r($domn_array); echo "

Voor Semanta zijn de volgende onderwerpen geidentificeerd door Lingvistica:

"; ksort($chbt_array); // echo "
Aantal corpora= ".count($corp_array); // echo ""; foreach($chbt_array as $chbt_key => $chbt_val) { $domn_dtls=explode("@",$chbt_key); $domn_chbt=$domn_dtls[1]; $domn_corp=$domn_dtls[0]; $domn_taal=$domn_dtls[2]; $domn_domn=$domn_dtls[3]; $domn_srvr=$domn_dtls[4]; $domn_titl=$domn_dtls[5]; // echo "
".$domn_srvr; $domn_tabl=str_replace("@","".$domn_tabl.""; // echo "
FRST LETR ".$frst_letr; if ($frelt !=$chbt_key[0]) { $frst_letr="-".strtoupper($chbt_key[0])."-"; // echo "
".$frst_letr; // $chbt_key[0]=$frst_letr; $ltref_domn="".$frst_letr."-".$chbt_key.""; $frelt=$chbt_key[0]; // echo ""; } $ltref_domn="".$chbt_key.""; // echo ""; } echo "
",$chbt_key); // echo "
".$domn_titl."".$ltref_domn.">/td>
".$domn_table."".$ltref_domn."
"; ?>