当前位置:网站首页>Tencent cloud ASR product PHP realizes real-time voice authentication request

Tencent cloud ASR product PHP realizes real-time voice authentication request

2022-06-24 08:32:00 Yuanlunqiao

One 、 preparation

(1) Open Tencent cloud https://cloud.tencent.com/

(2) Tencent cloud console opens real-time voice permission https://console.cloud.tencent.com/asr

(3) The console sets the secret key https://console.cloud.tencent.com/cam/capi

Content

explain

Support language

Mandarin Chinese 、 english 、 Cantonese 、 Korean 、 Japanese 、 Thai, 、 Shanghai dialect

Support industry

Universal 、 Finance 、 game 、 education 、 Medical care

Audio properties

Sampling rate :16000Hz or 8000Hz、 Sampling accuracy :16bits、 channel : Mono

Audio format

wav、pcm、opus、speex、silk、mp3、m4a、aac

Data length

The recommended audio fragmentation for each packet in the audio stream is 200ms,8k The audio slice size corresponding to the sampling rate is 3200 byte ,16k The audio slice size corresponding to the sampling rate is 6400 byte

Two 、 Code (HTTP、Websocket)

http The protocol code is as follows (https://cloud.tencent.com/document/product/1093/35799) Attention should be paid to , at present Http The official real-time voice protocol has been offline :

<?php
// Real time voice function 

class voiceTest{
    // -------------- Required.  Please log in to the console of Tencent cloud official website to obtain  ---------------------
    const APPID = "appid";# Need configuration 
    const SECRET_ID = " Secret key id";# Need configuration 
    const SECRET_KEY = " Secret key key";# Need configuration 

    const AGREEMENT             = "https";
    const VOICE_URL             = "asr.cloud.tencent.com/asr/v1/";
    const HTTPRequestMethod     = "POST";
    // --------------- Optional,  Please modify as needed  ---------------------
    /**  Engine model type 
        • 8k_zh: Telephone  8k  Mandarin Chinese is common ;
        • 8k_zh_finance: Telephone  8k  Financial domain model ;
         Non telephone scenario :
        • 16k_zh:16k  Mandarin Chinese is common ;
        • 16k_en:16k  English ;
        • 16k_ca:16k  Cantonese ;
        • 16k_ko:16k  Korean ;
        • 16k_zh-TW:16k  Traditional Chinese ;
        • 16k_ja:16k  Japanese .
     **/

    static $ENGINE_MODEL_TYPE = '16k_zh';

    // Result return method  0: Sync back , Get all the intermediate results , or 1: Tail package return 
    static $RES_TYPE = 1;
    //1: Real time streaming recognition 
    static $SUB_SERVICE_TYPE = 1;

    //  Identify the encoding method of the result text  0:UTF-8, 1:GB2312, 2:GBK,3:BIG5
    static $RESULT_TEXT_FORMAT = 0;

    //  Voice coding  1:wav 4:sp 6:silk
    static $VOICE_FORMAT = 8;

    // Hot words 
    static $HOT_WORD_ID = "";

    // If the total duration of the audio stream exceeds 60 second , The user needs to turn on  vad.0: close  vad,1: Turn on  vad.
    static $NEEDVAD = 0;

    // Speech segmentation detection threshold , If the mute duration exceeds this threshold, it will be considered as a broken sentence ( It is mostly used in intelligent customer service scenarios , Need to cooperate  needvad=1  Use ), Value range 150-2000, Company  ms, Currently only supported  8k_zh  Engine model 
    static $VAD_SILENCE_TIME = 2000;

    // Default 0
    static $SOURCE = 0;
    // Post processing parameters 
    static $FILTER_DIRTY = 0;
    static $FILTER_MODAL = 0;
    static $FILTER_PUNC  = 0;
    static $CONVERT_NUM_MODE = 0;
    static $WORD_INFO = 0;

    //  Speech slice length  cutlength<200000
    static $CUTLENGTH = 60000;


    public static function voice($pathFile){
        //get request   Set up url Parameters 
        $timestamp      = time();

        $httpUrlParams  =
        [
            "appid"         => self::APPID,
            "projectid"     => 0,
            "secretid"      => self::SECRET_ID,
            "sub_service_type"      => self::$SUB_SERVICE_TYPE, //1: Real time streaming recognition 
            "engine_model_type"     => self::$ENGINE_MODEL_TYPE,
            "result_text_format"    => self::$RESULT_TEXT_FORMAT,
            "res_type"      => self::$RES_TYPE,
            "voice_format"  => self::$VOICE_FORMAT,
            "needvad"       => self::$NEEDVAD,
            "source"        => self::$SOURCE,
            "voice_id"      => self::getRandomString(16),//16  position  String  String as the unique identification of each audio , Generated by the user .
            "timestamp"     => $timestamp,
            "expired"       => $timestamp + 24 * 60 * 60,
            "nonce"         => rand(1, 100000),// Random positive integers 
            "filter_dirty"  => self::$FILTER_DIRTY,
            "filter_modal"  => self::$FILTER_MODAL,
            "filter_punc"   => self::$FILTER_PUNC,
            "convert_num_mode"  => self::$CONVERT_NUM_MODE,
            "word_info"         => self::$WORD_INFO,
        ];
        //print_r($httpUrlParams);exit;
        // Query whether to set hot words 
        if (self::$HOT_WORD_ID != "")
        {
            $httpUrlParams["hotword_id"] = self::$HOT_WORD_ID;
        }
        // Query whether to set the voice segmentation detection threshold   Need to cooperate  needvad=1  Use , Value range 150-2000, Currently only supported 8k_zh
        if (self::$VAD_SILENCE_TIME >= 150
            && self::$VAD_SILENCE_TIME <= 2000
            && $httpUrlParams["needvad"] == 1
            && $httpUrlParams["engine_model_type"] == "8k_zh")
        {
            $httpUrlParams["vad_silence_time"] = self::$VAD_SILENCE_TIME;
        }

        // Get the incoming voice packet size 
        $voice_data = file_get_contents($pathFile);
        // Calculate the number of sharable packets 
        $voicelen   = strlen($voice_data);
        $whilenum   = ceil($voicelen /  self::$CUTLENGTH);

        $voiceSeq = 0;
        // Fragment incoming 
        while ($voiceSeq < $whilenum) {
            $voiceEnd = 0;

            // Final slice 
            if ($voiceSeq == ($whilenum - 1)) {
                $voiceEnd = 1;
            }

            $httpUrlParams["seq"] = $voiceSeq;
            $httpUrlParams["end"] = $voiceEnd;// The last piece is 1
            // Compute package nodes 
            $offset = $voiceSeq * self::$CUTLENGTH;
            $voiceSeq++;
            //get request url Splicing 
            $requestUrl = self::AGREEMENT."://".self::VOICE_URL.self::APPID."?";
            // To eliminate appid
            unset($httpUrlParams["appid"]);
            // Generate URL Request address 
            $requestUrl .= http_build_query($httpUrlParams);
            // authentication 
            $sign = self::getAuthorizationString($httpUrlParams);

            // Fragmented packet 
            $sectionData = file_get_contents($pathFile, NULL, NULL, $offset, self::$CUTLENGTH);
            $headers = [
                'Authorization: ' . $sign,
                'Content-Length: ' . strlen($sectionData),
            ];
            $result = self::get_curl_request($requestUrl, $sectionData, 'POST', $headers);
            echo $result , "\n";
        }
    }


    /**
     *  Send a request 
     * @param $url
     * @param array $param
     * @param string $mothod
     * @param array $headers
     * @param int $return_status
     * @param int $flag  close https certificate 
     * @return array|bool|string
     */
    static private function get_curl_request($url, $param, $mothod = 'POST', $headers = [], $return_status = 0, $flag = 0)
    {
        $ch = curl_init();
        if (!$flag) {
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        }

        curl_setopt($ch, CURLOPT_TIMEOUT, 6);

        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        if (strtolower($mothod) == 'post') {
            curl_setopt($ch, CURLOPT_POST, true);
            curl_setopt($ch, CURLOPT_POSTFIELDS, $param);
        } else {
            $url = $url . "?" . http_build_query($param);
        }
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 2);
        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);

        // agent 
//        curl_setopt($ch, CURLOPT_PROXY, "127.0.0.1");
//        curl_setopt($ch, CURLOPT_PROXYPORT, "12639");

        $ret = curl_exec($ch);
        $code = curl_getinfo($ch);
        curl_close($ch);
        if ($return_status == "1") {
            return array($ret, $code);
        }
        return $ret;
    }

    /**
     *  Generate random string 
     * @param $len
     * @param bool $special  Whether to open special characters 
     * @return string
     */
    private static function getRandomString($len, $special=false){
        $chars = array(
            "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k",
            "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v",
            "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G",
            "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R",
            "S", "T", "U", "V", "W", "X", "Y", "Z", "0", "1", "2",
            "3", "4", "5", "6", "7", "8", "9"
        );

        if($special){
            $chars = array_merge($chars, array(
                "!", "@", "#", "$", "?", "|", "{", "/", ":", ";",
                "%", "^", "&", "*", "(", ")", "-", "_", "[", "]",
                "}", "<", ">", "~", "+", "=", ",", "."
            ));
        }

        $charsLen = count($chars) - 1;
        shuffle($chars);                            // Disorder array order 
        $str = '';
        for($i=0; $i<$len; $i++){
            $str .= $chars[mt_rand(0, $charsLen)];    // Take out one at random 
        }
        return $str;
    }

    /**
     *  Create signature 
     * @param $params array  Commit parameter array 
     * @return string
     */
    private static function getAuthorizationString($params){
        // Encrypted string concatenation 
        $signString = self::HTTPRequestMethod.self::VOICE_URL.self::APPID."?";
        // Sort 
        ksort($params, SORT_STRING);
        // Remove appid
        unset($params["appid"]);
        // turn url
        $signString .= http_build_query($params);
        $sign = base64_encode(hash_hmac('SHA1', $signString, self::SECRET_KEY, true));
        return $sign;
    }
}

// Set parameters 
// request 
voiceTest::voice("./test_wav/16k/16k_30s.wav");

websocket The protocol code is as follows (https://cloud.tencent.com/document/product/1093/48982):

<?php
// The first step is to install socket  package  # https://github.com/Textalk/websocket-php
// composer require textalk/websocket
// Step 2 reference 
require_once "vendor/autoload.php";
//PHP  About real-time speech recognition request authentication 
class wsVoice{
    const APPID = "appid";# Need configuration 
    const SECRET_ID = " Secret key id";#  Need configuration 
    const SECRET_KEY = " Secret key key";#  Need configuration 

    const AGREEMENT             = "wss://";
    const VOICE_URL             = "asr.cloud.tencent.com/asr/v2/";

    /**  Engine model type 
    • 8k_zh: Telephone  8k  Mandarin Chinese is common ;
    • 8k_zh_finance: Telephone  8k  Financial domain model ;
     Non telephone scenario :
    • 16k_zh:16k  Mandarin Chinese is common ;
    • 16k_en:16k  English ;
    • 16k_ca:16k  Cantonese ;
    • 16k_ko:16k  Korean ;
    • 16k_zh-TW:16k  Traditional Chinese ;
    • 16k_ja:16k  Japanese .
     **/
    static $ENGINE_MODEL_TYPE = '16k_zh';

    //  Voice coding  1:wav 4:sp 6:silk
    static $VOICE_FORMAT = 1;

    // Hot words 
    static $HOT_WORD_ID = "";

    // If the total duration of the audio stream exceeds 60 second , The user needs to turn on  vad.0: close  vad,1: Turn on  vad.
    static $NEEDVAD = 0;

    // Speech segmentation detection threshold , If the mute duration exceeds this threshold, it will be considered as a broken sentence ( It is mostly used in intelligent customer service scenarios , Need to cooperate  needvad=1  Use ), Value range 150-2000, Company  ms, Currently only supported  8k_zh  Engine model 
    static $VAD_SILENCE_TIME = 2000;

    // Post processing parameters 
    static $FILTER_DIRTY = 0;
    static $FILTER_MODAL = 0;
    static $FILTER_PUNC  = 0;

    //  Speech slice length  cutlength<200000
    static $CUTLENGTH = 6000;


    public function ws($pathFile = ""){
        $timestamp      = time();
        $httpUrlParams  =
            [
                "secretid"      => self::SECRET_ID,
                "timestamp"     => $timestamp,
                "expired"       => $timestamp + 24 * 60 * 60,
                "nonce"         => rand(1, 100000),// Random positive integers 
                "engine_model_type"     => self::$ENGINE_MODEL_TYPE,
                "voice_id"      => self::getRandomString(16),//16  position  String  String as the unique identification of each audio , Generated by the user .
                "voice_format"  => self::$VOICE_FORMAT,
                "needvad"       => self::$NEEDVAD,
                "filter_dirty"  => self::$FILTER_DIRTY,
                "filter_modal"  => self::$FILTER_MODAL,
                "filter_punc"   => self::$FILTER_PUNC,
                "convert_num_mode" => 1,
                "word_info" => 0,
                //"hotword_id"
                //"vad_silence_time"
                //"signature"
            ];

        // Query whether to set hot words 
        if (self::$HOT_WORD_ID != "")
        {
            $httpUrlParams["hotword_id"] = self::$HOT_WORD_ID;
        }

        // Query whether to set the voice segmentation detection threshold   Need to cooperate  needvad=1  Use , Value range 150-2000, Currently only supported 8k_zh
        if (self::$VAD_SILENCE_TIME >= 240
            && self::$VAD_SILENCE_TIME <= 2000
            && $httpUrlParams["needvad"] == 1
            && in_array($httpUrlParams["engine_model_type"], ["8k_zh", "8k_zh_finance", "16k_zh"]))
        {
            $httpUrlParams["vad_silence_time"] = self::$VAD_SILENCE_TIME;
        }

        // Get encryption 
        $signature =  self::getAuthorizationString($httpUrlParams);
        echo $signature." \n";
        // Request address 
        $httpUrlParams["signature"] = $signature;
        $requestUrl = self::AGREEMENT.self::VOICE_URL.self::APPID."?";
        $requestUrl .= http_build_query($httpUrlParams);
        echo $requestUrl. " \n";

        // Get the incoming voice packet size 
        $voice_data = file_get_contents($pathFile);
        // Calculate the number of sharable packets 
        $voicelen   = strlen($voice_data);
        $whilenum   = ceil($voicelen /  self::$CUTLENGTH);
        $voiceSeq = 0;
        // Fragment incoming 

        $client = new WebSocket\Client($requestUrl);
//        $client->binary(" First handshake ");
//        echo $client->receive();

        echo " The total number of times ".$whilenum. " \n";
        while ($voiceSeq < $whilenum) {
            // Compute package nodes 
            $offset = $voiceSeq * self::$CUTLENGTH;

            $voiceSeq++;
            // Fragmented packet 
            $sectionData = file_get_contents($pathFile, NULL, NULL, $offset, self::$CUTLENGTH);
            //echo $sectionData;exit;
            //ws request 
            $client->binary($sectionData);
            echo $client->receive();
            echo "\n";
        }
        echo " end  \n";
        $client->text('{"type": "end"}');
        echo $client->receive();
        $client->close();
    }


    /**
     *  Create signature 
     * @param $params array  Commit parameter array 
     * @return string
     */
    private static function getAuthorizationString($params){
        // Encrypted string concatenation 
        $signString = self::VOICE_URL.self::APPID."?";
        // Sort 
        ksort($params, SORT_STRING);
        // turn url
        $signString .= http_build_query($params);
        $sign = base64_encode(hash_hmac('SHA1', $signString, self::SECRET_KEY, true));
        return $sign;
    }

    /**
     *  Generate random string 
     * @param $len
     * @param bool $special  Whether to open special characters 
     * @return string
     */
    private static function getRandomString($len, $special=false){
        $chars = array(
            "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k",
            "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v",
            "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G",
            "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R",
            "S", "T", "U", "V", "W", "X", "Y", "Z", "0", "1", "2",
            "3", "4", "5", "6", "7", "8", "9"
        );

        if($special){
            $chars = array_merge($chars, array(
                "!", "@", "#", "$", "?", "|", "{", "/", ":", ";",
                "%", "^", "&", "*", "(", ")", "-", "_", "[", "]",
                "}", "<", ">", "~", "+", "=", ",", "."
            ));
        }

        $charsLen = count($chars) - 1;
        shuffle($chars);                            // Disorder array order 
        $str = '';
        for($i=0; $i<$len; $i++){
            $str .= $chars[mt_rand(0, $charsLen)];    // Take out one at random 
        }
        return $str;
    }

}

$model = new wsVoice();
$model->ws("./test.pcm");
原网站

版权声明
本文为[Yuanlunqiao]所创,转载请带上原文链接,感谢
https://yzsam.com/2021/06/20210624131107594W.html