count_file.md 3.3 KB
Newer Older
F
feilong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
# Python 文件统计

统计文件中行数,非空行数,以及空格间隔的token数

```python
# -*- coding: UTF-8 -*-
import json

def count_file(file):
    line_count = 0
    non_empty_line_count = 0
    token_count = 0

    with open(file, 'r') as f:
        while True:
            # 读取每行
            line = f.readline()
            if not line:
                break

            line_count += 1
            line_len = len(line)
            line_token_count = 0

            # TODO(You): 请在此实现统计单行token数

            token_count += line_token_count
            if line_token_count > 0:
                non_empty_line_count += 1

    return {
        'file': file,
        'line_count': line_count,
        'line_token_count': token_count,
        'non_empty_line_count': non_empty_line_count
    }

if __name__ == '__main__':
    ret = count_file('count_file.py')
    print('行数:', ret['line_count'])
    print('非空行:', ret['non_empty_line_count'])
    print('非空词数:', ret['line_token_count'])
    with open('/tmp/count.json', 'w') as f:
        f.write(json.dumps(ret, indent=2, ensure_ascii=False))
```

请选出下列能**正确**实现这一功能的选项。

## template

```python
import json


def count_file(file):
    line_count = 0
    non_empty_line_count = 0
    token_count = 0

    with open(file, 'r') as f:
        while True:
            line = f.readline()
            if not line:
                break
            line_count += 1

            line_len = len(line)
            i = 0
            blank = True
            line_token_count = 0
            while i < line_len:
                char = line[i]
                if char in [' ', '\t', '\b']:
                    blank = True
                else:
                    if blank:
                        line_token_count += 1
                    blank = False
                i += 1

            token_count += line_token_count
            if line_token_count > 0:
                non_empty_line_count += 1

    return {
        'file': file,
        'line_count': line_count,
        'line_token_count': token_count,
        'non_empty_line_count': non_empty_line_count
    }


if __name__ == '__main__':
    ret = count_file('count_file.py')
    print('行数:', ret['line_count'])
    print('非空行:', ret['non_empty_line_count'])
    print('非空词数:', ret['line_token_count'])
    with open('/tmp/count.json', 'w') as f:
        f.write(json.dumps(ret, indent=2, ensure_ascii=False))
```

## 答案

```python
blank = False
for char in line:
    if char in [' ', '\t', '\b']:
        blank = True
    else:
        if blank:
            line_token_count += 1
        blank = False
```

## 选项

### A

```python
blank = True
i = 0
while i < line_len:
    char = line[i]
    if char in [' ', '\t', '\b']:
        blank = True
    else:
        if blank:
            line_token_count += 1
        blank = False
    i += 1
```

### B

```python
blank = True
i = 0
while i < line_len:
    char = line[i]
    if char in [' ', '\t', '\b', '\n']:
        if not blank:
            line_token_count += 1
        blank = True
    else:
        blank = False
    i += 1
```

### C

```python
blank = True
for char in line:
    if char in [' ', '\t', '\b', '\n']:
        if not blank:
            line_token_count += 1
        blank = True
    else:
        blank = False
```