text_classification.php 2.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
<?
define(BATCH_SIZE, "10");

function read_data($data_file, &$samples, &$labels) {
    $handle = fopen($data_file, "r");

    $search = array("(", ")", "[", "]");
    $count = 0;

    while (($buffer = fgets($handle)) !== false) {
        $count++;
        $buffer = str_ireplace($search, "", $buffer);
        $x = explode(",", $buffer);
        $ids = array();

        for ($i = 0; $i < count($x); ++$i) {
            $ids[] = (int)($x[$i]);
        }

        $label = array_slice($ids, count($ids) - 1);
        $sample = array_slice($ids, 0, count($ids) - 1);
        $samples[] = array("ids" => $sample);
        $labels[] = $label;

        unset($x);
        unset($buffer);
        unset($ids);
        unset($sample);
        unset($label);
    }

    if (!feof($handle)) {
        echo "Unexpected fgets() fail";
        return -1;
    }
    fclose($handle);
}

function &http_connect($url) {
    $ch = curl_init($url);
    curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
    // true是获取文本,不直接输出
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    // 强制curl不使用100-continue
    curl_setopt($ch, CURLOPT_HTTPHEADER, array('Expect:'));
    // set header
    curl_setopt($ch,
            CURLOPT_HTTPHEADER,
            array(
                'Content-Type: application/json'
            )
    );

    return $ch;
}

function http_post(&$ch, $data) {
    // array to json string
    $data_string = json_encode($data);

    // post data 封装
    curl_setopt($ch, CURLOPT_POSTFIELDS, $data_string);

    // set header
    curl_setopt($ch,
            CURLOPT_HTTPHEADER,
            array(
                'Content-Length: ' . strlen($data_string)
            )
    );

    // 执行
    $result = curl_exec($ch);
    return $result;
}

if ($argc != 2) {
    echo "Usage: php text_classification.php DATA_SET_FILE\n";
    return -1;
}

ini_set('memory_limit', '-1');

$samples = array();
$labels = array();
read_data($argv[1], $samples, $labels);
echo count($samples) . "\n";

// key value 数组,如果多,后面用逗号分开key =>value ,key1 => value1 ,....

$ch = &http_connect('http://127.0.0.1:8010/TextClassificationService/inference');

$count = 0;
for ($i = 0; $i < count($samples) - BATCH_SIZE; $i += BATCH_SIZE) {
    $instances = array_slice($samples, $i, BATCH_SIZE);
    echo http_post($ch, array("instances" => $instances)) . "\n";
}

curl_close($ch);

?>