谷歌翻译TTS API被阻止

时间:2015-08-17 14:49:39

标签: google-text-to-speech

Google实施了验证码以阻止人们访问TTS翻译API https://translate.google.com/translate_tts?ie=UTF-8&q=test&tl=zh-TW。我在我的移动应用程序中使用它。现在,它没有返回任何东西。我如何绕过验证码?

5 个答案:

答案 0 :(得分:23)

添加限定符'& client = tw-ob'到查询结束。 https://translate.google.com/translate_tts?ie=UTF-8&q=test&tl=zh-TW&client=tw-ob

这个答案不再一致。如果你滥用太多,你的IP地址将被谷歌暂时阻止。

答案 1 :(得分:9)

有三个主要问题:

  1. 您必须在查询字符串中包含“client”(client = t似乎可以正常工作)。
  2. (如果您尝试使用AJAX检索它),HTTP请求的Referer必须是 https://translate.google.com/
  3. 每个查询的
  4. “tk”字段都会更改,并且必须使用匹配的哈希填充: tk = hash(q,TKK),其中q是要被TTS化的文本,并且当您加载translate.google.com时,TKK是全局范围中的var :(在控制台中键入'window.TKK')。请参阅本回复底部的哈希函数(calcHash)。
  5. 总结:

    function generateGoogleTTSLink(q, tl, tkk) {
        var tk = calcHash(q, tkk);
        return `https://translate.google.com/translate_tts?ie=UTF-8&total=1&idx=0&client=t&ttsspeed=1&tl=${tl}&tk=${tk}&q=${q}&textlen=${q.length}`;
    }
    
    generateGoogleTTSLink('ciao', 'it', '410353.1336369826');
    // see definition of "calcHash" in the bottom of this comment.

    =>为了获得TKK,您可以打开谷歌翻译网站,然后在开发人员工具的控制台中输入“TKK”(例如:“410353.1336369826”)。

    请注意,TKK值每小时都会发生变化,因此,旧TKK可能会在某些时候被阻止,并且可能需要刷新它(尽管到目前为止看起来旧密钥可以工作很长时间)。

    如果您希望定期刷新TKK,它可以很容易地自动化,但如果您从浏览器运行代码则不行。

    您可以在此处找到完整的NodeJS实现: https://github.com/guyrotem/google-translate-server。 它暴露了一个最小的TTS API(查询,语言),并被部署到一个免费的Heroku服务器,所以你可以在线测试它。

    function shiftLeftOrRightThenSumOrXor(num, opArray) {
    	return opArray.reduce((acc, opString) => {
    		var op1 = opString[1];	//	'+' | '-' ~ SUM | XOR
    		var op2 = opString[0];	//	'+' | '^' ~ SLL | SRL
    		var xd = opString[2];	//	[0-9a-f]
    
    		var shiftAmount = hexCharAsNumber(xd);
    		var mask = (op1 == '+') ? acc >>> shiftAmount : acc << shiftAmount;
    		return (op2 == '+') ? (acc + mask & 0xffffffff) : (acc ^ mask);
    	}, num);
    }
    
    function hexCharAsNumber(xd) {
    	return (xd >= 'a') ? xd.charCodeAt(0) - 87 : Number(xd);
    }
    
    function transformQuery(query) {
    	for (var e = [], f = 0, g = 0; g < query.length; g++) {
    	  var l = query.charCodeAt(g);
    	  if (l < 128) {
    	  	e[f++] = l;					//	0{l[6-0]}
    	  } else if (l < 2048) {
    	  	e[f++] = l >> 6 | 0xC0;		//	110{l[10-6]}
    	  	e[f++] = l & 0x3F | 0x80;	//	10{l[5-0]}
    	  } else if (0xD800 == (l & 0xFC00) && g + 1 < query.length && 0xDC00 == (query.charCodeAt(g + 1) & 0xFC00)) {
    	  	//	that's pretty rare... (avoid ovf?)
    	  	l = (1 << 16) + ((l & 0x03FF) << 10) + (query.charCodeAt(++g) & 0x03FF);
    	  	e[f++] = l >> 18 | 0xF0;		//	111100{l[9-8*]}
    	  	e[f++] = l >> 12 & 0x3F | 0x80;	//	10{l[7*-2]}
    	  	e[f++] = l & 0x3F | 0x80;		//	10{(l+1)[5-0]}
    	  } else {
    		e[f++] = l >> 12 | 0xE0;		//	1110{l[15-12]}
    		e[f++] = l >> 6 & 0x3F | 0x80;	//	10{l[11-6]}
    		e[f++] = l & 0x3F | 0x80;		//	10{l[5-0]}
    	  }
    	}
    	return e;
    }
    
    function normalizeHash(encondindRound2) {
    	if (encondindRound2 < 0) {
    		encondindRound2 = (encondindRound2 & 0x7fffffff) + 0x80000000;
    	}
    	return encondindRound2 % 1E6;
    }
    
    function calcHash(query, windowTkk) {
    	//	STEP 1: spread the the query char codes on a byte-array, 1-3 bytes per char
    	var bytesArray = transformQuery(query);
    
    	//	STEP 2: starting with TKK index, add the array from last step one-by-one, and do 2 rounds of shift+add/xor
    	var d = windowTkk.split('.');
    	var tkkIndex = Number(d[0]) || 0;
    	var tkkKey = Number(d[1]) || 0;
    
    	var encondingRound1 = bytesArray.reduce((acc, current) => {
    		acc += current;
    		return shiftLeftOrRightThenSumOrXor(acc, ['+-a', '^+6'])
    	}, tkkIndex);
    
    	//	STEP 3: apply 3 rounds of shift+add/xor and XOR with they TKK key
    	var encondingRound2 = shiftLeftOrRightThenSumOrXor(encondingRound1, ['+-3', '^+b', '+-f']) ^ tkkKey;
    
    	//	STEP 4: Normalize to 2s complement & format
    	var normalizedResult = normalizeHash(encondingRound2);
    
    	return normalizedResult.toString() + "." + (normalizedResult ^ tkkIndex)
    }
    
    // usage example:
    var tk = calcHash('hola', '409837.2120040981');
    console.log('tk=' + tk);
     // OUTPUT: 'tk=70528.480109'

答案 2 :(得分:3)

首先,为了避免验证码,你必须设置一个合适的用户代理,如:
“Mozilla / 5.0(X11; Ubuntu; Linux x86_64; rv:46.0​​)Gecko / 20100101 Firefox / 46.0”
然后,为了不被阻止,您必须为每个请求提供适当的令牌(“tk”获取参数) 在网络上,您可以找到许多不同类型的脚本,这些脚本在经过大量逆向工程后会尝试计算令牌...但每次大G改变算法时你都会再次卡住,所以更容易检索你的令牌观察深度类似的翻译页面请求(文本在网址中) 您可以通过使用phantomjs从这个简单代码的输出中按“tk =”来读取令牌时间:

"use strict";
var page = require('webpage').create();
var system = require('system');
var args = system.args;
if (args.length != 2) { console.log("usage: "+args[0]+" text");  phantom.exit(1); }
page.onConsoleMessage = function(msg) {     console.log(msg); };
page.onResourceRequested = function(request) {   console.log('Request ' + JSON.stringify(request, undefined, 4)); };
page.open("https://translate.google.it/?hl=it&tab=wT#fr/it/"+args[1],     function(status) {
if (status === "success")    {             phantom.exit(0);           } 
else {      phantom.exit(1);    }
});

所以最后你可以用以下的方式得到你的演讲:
wget -U“Mozilla / 5.0(X11; Ubuntu; Linux x86_64; rv:46.0​​)Gecko / 20100101 Firefox / 46.0” “http://translate.google.com/translate_tts?ie=UTF-8&tl=it&tk=52269.458629&q=ciao&client=t” - O ciao.mp3
(令牌可能是基于时间的,所以这个链接可能明天不起作用)

答案 3 :(得分:1)

您也可以尝试以下格式:

  1. 传递您所用语言的q = urlencode格式 (在JavaScript中,您可以使用encodeURI()函数&amp; PHP具有rawurlencode()函数)

  2. 传递tl =语言短名称(假设bangla = bn)

  3. 现在试试这个:

    https://translate.google.com.vn/translate_tts?ie=UTF-8&q=%E0%A6%A2%E0%A6%BE%E0%A6%95%E0%A6%BE+&tl=bn&client=tw-ob

答案 4 :(得分:1)

我用Java重写了Guy Rotem的答案,所以如果您喜欢Java而不是Javascript,请随时使用:

public class Hasher {
    public long shiftLeftOrRightThenSumOrXor(long num, String[] opArray) {
        long result = num;

        int current = 0;
        while (current < opArray.length)  {
            char op1 = opArray[current].charAt(1);  //  '+' | '-' ~ SUM | XOR
            char op2 = opArray[current].charAt(0);  //  '+' | '^' ~ SLL | SRL
            char xd = opArray[current].charAt(2);   //  [0-9a-f]

            assertError(op1 == '+'
                    || op1 == '-', "Invalid OP: " + op1);
            assertError(op2 == '+'
                    || op2 == '^', "Invalid OP: " + op2);
            assertError(('0' <= xd && xd <= '9')
                    || ('a' <= xd && xd <='f'), "Not an 0x? value: " + xd);

            int shiftAmount = hexCharAsNumber(xd);
            int mask = (op1 == '+') ? ((int) result) >>> shiftAmount : ((int) result) << shiftAmount;

            long subresult = (op2 == '+') ? (((int) result) + ((int) mask) & 0xffffffff)
                    : (((int) result) ^ mask);
            result = subresult;
            current++;
        }

        return result;
    }

    public void assertError(boolean cond, String e) {
        if (!cond) {
            System.err.println();
        }
    }

    public int hexCharAsNumber(char xd) {
        return (xd >= 'a') ? xd - 87 : Character.getNumericValue(xd);
    }

    public int[] transformQuery(String query) {
        int[] e = new int[1000];
        int resultSize = 1000;

        for (int f = 0, g = 0; g < query.length(); g++) {
            int l = query.charAt(g);
            if (l < 128) {
                e[f++] = l;                 //  0{l[6-0]}
            } else if (l < 2048) {
                e[f++] = l >> 6 | 0xC0;     //  110{l[10-6]}
                e[f++] = l & 0x3F | 0x80;   //  10{l[5-0]}
            } else if (0xD800 == (l & 0xFC00) &&
                    g + 1 < query.length() && 0xDC00 == (query.charAt(g + 1) & 0xFC00)) {
                //  that's pretty rare... (avoid ovf?)
                l = (1 << 16) + ((l & 0x03FF) << 10) + (query.charAt(++g) & 0x03FF);
                e[f++] = l >> 18 | 0xF0;        //  111100{l[9-8*]}
                e[f++] = l >> 12 & 0x3F | 0x80; //  10{l[7*-2]}
                e[f++] = l & 0x3F | 0x80;       //  10{(l+1)[5-0]}
            } else {
                e[f++] = l >> 12 | 0xE0;        //  1110{l[15-12]}
                e[f++] = l >> 6 & 0x3F | 0x80;  //  10{l[11-6]}
                e[f++] = l & 0x3F | 0x80;       //  10{l[5-0]}
            }

            resultSize = f;
        }

        return Arrays.copyOf(e, resultSize);
    }

    public long normalizeHash(long encondindRound2) {
        if (encondindRound2 < 0) {
            encondindRound2 = (encondindRound2 & 0x7fffffff) + 0x80000000L;
        }
        return (encondindRound2) % 1_000_000;
    }

    /*
    /   EXAMPLE:
    /
    /   INPUT: query: 'hola', windowTkk: '409837.2120040981'
    /   OUTPUT: '70528.480109'
    /
    */
    public String calcHash(String query, String windowTkk) {
        //  STEP 1: spread the the query char codes on a byte-array, 1-3 bytes per char
        int[] bytesArray = transformQuery(query);

        //  STEP 2: starting with TKK index,
        // add the array from last step one-by-one, and do 2 rounds of shift+add/xor
        String[] d = windowTkk.split("\\.");
        int tkkIndex = 0;
        try  {
            tkkIndex = Integer.valueOf(d[0]);
        }
        catch (Exception e)  {
            e.printStackTrace();
        }

        long tkkKey = 0;
        try  {
            tkkKey = Long.valueOf(d[1]);
        }
        catch (Exception e)  {
            e.printStackTrace();
        }

        int current = 0;
        long result = tkkIndex;
        while (current < bytesArray.length)  {
            result += bytesArray[current];
            long subresult = shiftLeftOrRightThenSumOrXor(result,
                    new String[] {"+-a", "^+6"});
            result = subresult;
            current++;
        }
        long encondingRound1 = result;
        //System.out.println("encodingRound1: " + encondingRound1);

        //  STEP 3: apply 3 rounds of shift+add/xor and XOR with they TKK key
        long encondingRound2 = ((int) shiftLeftOrRightThenSumOrXor(encondingRound1,
                new String[] {"+-3", "^+b", "+-f"})) ^ ((int) tkkKey);
        //System.out.println("encodingRound2: " + encondingRound2);

        //  STEP 4: Normalize to 2s complement & format
        long normalizedResult = normalizeHash(encondingRound2);
        //System.out.println("normalizedResult: " +  normalizedResult);

        return String.valueOf(normalizedResult) + "."
                + (((int) normalizedResult) ^ (tkkIndex));
    }
}