この日記はGNSで生成しています。 |
_ 最近検索エンジンからのrefererがかなり増えて、キーワードを確認するためだけにwwwを見に行くのがもうイヤになったので、検索くんキーワード抽出プログラムを書く。どんなパターンが来るかさえ調べてしまえば、簡単だ。
require 'jcode.pl'; # $searchkey = &getsearchkey( REFERER_URI ); sub getsearchkey { local( $referer ) = shift; local( $found ); local( $found_sjis ); local( $found_sjis2 ); $_ = $referer; $found = ""; if ( /[?&]MT=([^&]+)/o ) { $found .= " $1"; } # www.hotbot.com, www.goo.ne.jp, if ( /[?&]p=([^&]+)/o ) { $found .= " $1"; } # ink.yahoo.com, search.yahoo.co.jp, if ( /[?&]qt=([^&]+)/o ) { $found .= " $1"; } # www.infoseek.co.jp, if ( /[?&]query=([^&]+)/o ) { $found .= " $1"; } # www.lycos.co.jp, if ( /[?&]key=([^&]+)/o ) { $found .= " $1"; } # odin.ingrid.org, if ( /[?&]s=([^&]+)/o ) { $found .= " $1"; } # www.excite.co.jp, if ( /[?&]search=([^&]+)/o ) { $found .= " $1"; } # www.excite.co.jp, if ( /[?&]q=([^&]+)/o ) { $found .= " $1"; } # www.altavista.com, if ( /[?&]FI_1=([^&]+)/o ) { $found .= " $1"; # www.excite.co.jp, if ( /[?&]FI_2=([^&]+)/o ) { $found .= " $1"; } if ( /[?&]FI_3=([^&]+)/o ) { $found .= " $1"; } if ( /[?&]FI_4=([^&]+)/o ) { $found .= " $1"; } if ( /[?&]FI_5=([^&]+)/o ) { $found .= " $1"; } if ( /[?&]FI_6=([^&]+)/o ) { $found .= " $1"; } } if ( /[?&]AW0=([^&]+)/o ) { $found .= " $1"; # www.hotbot.com, if ( /[?&]AW1=([^&]+)/o ) { $found .= " $1"; } if ( /[?&]AW2=([^&]+)/o ) { $found .= " $1"; } if ( /[?&]AW3=([^&]+)/o ) { $found .= " $1"; } if ( /[?&]AW4=([^&]+)/o ) { $found .= " $1"; } if ( /[?&]AW5=([^&]+)/o ) { $found .= " $1"; } } $found =~ s/^ //o; $found =~ s/%([0-9a-fA-F][0-9a-fA-F])/pack("C", hex($1))/ego; $found_sjis = $found; &jcode'convert( *found_sjis, 'jis', 'sjis' ); $found_sjis2 = $found_sjis; &jcode'convert( *found_sjis, 'sjis' ); if ( $found eq $found_sjis ) { $found = $found_sjis2; } else { &jcode'convert( *found, 'jis' ); } return ( $found ); }
メールはこちらへ...[後藤浩昭 / Hiroaki GOTO / GORRY / gorry@hauN.org]