diff --git "a/data/3.python\351\253\230\351\230\266/2.\347\273\223\346\236\204\345\214\226\346\225\260\346\215\256\345\210\206\346\236\220\345\267\245\345\205\267Pandas/4.\351\253\230\347\272\247\345\272\224\347\224\250/stat.json" "b/data/3.python\351\253\230\351\230\266/2.\347\273\223\346\236\204\345\214\226\346\225\260\346\215\256\345\210\206\346\236\220\345\267\245\345\205\267Pandas/4.\351\253\230\347\272\247\345\272\224\347\224\250/stat.json" index 2354915ff52965300feaae9c48b3d1759436a20e..237918b7879bef28f8f93b13fe8b5b2aabf6e0b4 100644 --- "a/data/3.python\351\253\230\351\230\266/2.\347\273\223\346\236\204\345\214\226\346\225\260\346\215\256\345\210\206\346\236\220\345\267\245\345\205\267Pandas/4.\351\253\230\347\272\247\345\272\224\347\224\250/stat.json" +++ "b/data/3.python\351\253\230\351\230\266/2.\347\273\223\346\236\204\345\214\226\346\225\260\346\215\256\345\210\206\346\236\220\345\267\245\345\205\267Pandas/4.\351\253\230\347\272\247\345\272\224\347\224\250/stat.json" @@ -1,6 +1,7 @@ { - "source": "stat.py", - "depends": [], - "exercise_id": 125, - "type": "code_options" + "author": "huanhuilong", + "source": "stat.md", + "depends": [], + "exercise_id": 125, + "type": "code_options" } \ No newline at end of file diff --git "a/data/3.python\351\253\230\351\230\266/2.\347\273\223\346\236\204\345\214\226\346\225\260\346\215\256\345\210\206\346\236\220\345\267\245\345\205\267Pandas/4.\351\253\230\347\272\247\345\272\224\347\224\250/stat.md" "b/data/3.python\351\253\230\351\230\266/2.\347\273\223\346\236\204\345\214\226\346\225\260\346\215\256\345\210\206\346\236\220\345\267\245\345\205\267Pandas/4.\351\253\230\347\272\247\345\272\224\347\224\250/stat.md" new file mode 100644 index 0000000000000000000000000000000000000000..95a4c8301856c326a24d6649f309b1e18e4416bd --- /dev/null +++ "b/data/3.python\351\253\230\351\230\266/2.\347\273\223\346\236\204\345\214\226\346\225\260\346\215\256\345\210\206\346\236\220\345\267\245\345\205\267Pandas/4.\351\253\230\347\272\247\345\272\224\347\224\250/stat.md" @@ -0,0 +1,118 @@ +# pandas dataframe之apply + +apply的使用 + + +## template + +```python +import pandas as pd +import numpy as np + +def add_val(num): + if num > 0: + return 1 + else: + return 0 + + +if __name__ == '__main__': + data = { + 'cloumn_one': pd.Series(np.random.randint(-10, 10, size=5)), + 'cloumn_two': pd.Series(np.random.randint(0, 10, size=5)), + 'cloumn_three': pd.Series(np.random.randint(0, 10, size=5)) + } + data_df = pd.DataFrame(data) + + data_df['label_one'] = data['cloumn_one'].apply(add_val) + + print(data_df) +``` + +## 答案 + +```python +import pandas as pd +import numpy as np + +if __name__ == '__main__': + data = { + 'cloumn_one': pd.Series(np.random.randint(-10, 10, size=5)), + 'cloumn_two': pd.Series(np.random.randint(0, 10, size=5)), + 'cloumn_three': pd.Series(np.random.randint(0, 10, size=5)) + } + data_df = pd.DataFrame(data) + + data_df['label_one'] = data['cloumn_one'].apply['num'>1] + + print(data_df) +``` + +## 选项 + +### A + +```python +import pandas as pd +import numpy as np + +def add_val(num): + if num > 0: + return 1 + else: + return 0 + + +if __name__ == '__main__': + data = { + 'cloumn_one': pd.Series(np.random.randint(-10, 10, size=5)), + 'cloumn_two': pd.Series(np.random.randint(0, 10, size=5)), + 'cloumn_three': pd.Series(np.random.randint(0, 10, size=5)) + } + data_df = pd.DataFrame(data) + + data_df['label_one'] = data['cloumn_one'].apply(add_val) + + print(data_df) +``` + +### B + +```python +import pandas as pd +import numpy as np + +if __name__ == '__main__': + data = { + 'cloumn_one': pd.Series(np.random.randint(-10, 10, size=5)), + 'cloumn_two': pd.Series(np.random.randint(0, 10, size=5)), + 'cloumn_three': pd.Series(np.random.randint(0, 10, size=5)) + } + data_df = pd.DataFrame(data) + + data_df['label_one'] = data['cloumn_one'].apply(lambda num: 1 if num>0 else 0) + + print(data_df) +``` + +### C + +```python +import pandas as pd +import numpy as np + +if __name__ == '__main__': + data = { + 'cloumn_one': pd.Series(np.random.randint(-10, 10, size=5)), + 'cloumn_two': pd.Series(np.random.randint(0, 10, size=5)), + 'cloumn_three': pd.Series(np.random.randint(0, 10, size=5)) + } + data_df = pd.DataFrame(data) + + def add_val(num): + return 1 if num>0 else 0 + + data_df['label_one'] = data['cloumn_one'].apply(add_val) + + print(data_df) +```