{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Hate Speech - Ethiopia.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyMrbg9XCwH/rahVzjhlwI0Y",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"metadata": {
"id": "0GZ0Y0S_Nv3m"
},
"source": [
"import pandas as pd"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "d-AgFL-yR7eR"
},
"source": [
"terms = pd.read_csv('/content/Terms.csv')"
],
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"id": "pucm09BHTbd3",
"outputId": "3a2a2aac-de5c-4285-cee3-d8b1cc1c64ef"
},
"source": [
"terms.head()"
],
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"
\n", " | List_the_hate_speech_phrase_with_a_comma | \n", "a_Term | \n", "a_Term_001 | \n", "a_Term_002 | \n", "
---|---|---|---|---|
0 | \n", "SARANGA TI WALI ; TAXI -MOTO ; VOYOU ;, | \n", "SARANGA TI WALI | \n", "TAXI-MOTO | \n", "VOYOUX | \n", "
1 | \n", "rien | \n", "rien | \n", "NaN | \n", "NaN | \n", "
2 | \n", "HAINE ; RELIGION ; ETHNIQUE ; | \n", "HAINE | \n", "NaN | \n", "NaN | \n", "
3 | \n", "TETUE ; VOYOU ; MO YINGA MBI ? | \n", "TETUE | \n", "NaN | \n", "NaN | \n", "
4 | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES; LES YAKO... | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES | \n", "LES YAKOMAS SONT DES ORGUEILLEUX | \n", "LES MANDJA SONT DES GRANDS VOLEURS ; | \n", "
\n", " | List_the_hate_speech_phrase_with_a_comma | \n", "a_Term | \n", "a_Term_001 | \n", "a_Term_002 | \n", "
---|---|---|---|---|
count | \n", "482 | \n", "482 | \n", "284 | \n", "185 | \n", "
unique | \n", "481 | \n", "403 | \n", "246 | \n", "168 | \n", "
top | \n", "ETRANGER ; RELIGION ; POLITIQUE ; | \n", "ETRANGER | \n", "BENGUE | \n", "BENGUE | \n", "
freq | \n", "2 | \n", "11 | \n", "5 | \n", "5 | \n", "
\n", " | terms_list | \n", "term_1 | \n", "term_2 | \n", "term_3 | \n", "
---|---|---|---|---|
0 | \n", "SARANGA TI WALI ; TAXI -MOTO ; VOYOU ;, | \n", "SARANGA TI WALI | \n", "TAXI-MOTO | \n", "VOYOUX | \n", "
1 | \n", "rien | \n", "rien | \n", "NaN | \n", "NaN | \n", "
2 | \n", "HAINE ; RELIGION ; ETHNIQUE ; | \n", "HAINE | \n", "NaN | \n", "NaN | \n", "
3 | \n", "TETUE ; VOYOU ; MO YINGA MBI ? | \n", "TETUE | \n", "NaN | \n", "NaN | \n", "
4 | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES; LES YAKO... | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES | \n", "LES YAKOMAS SONT DES ORGUEILLEUX | \n", "LES MANDJA SONT DES GRANDS VOLEURS ; | \n", "
5 | \n", "MO KPA GUI A ALBUNOS ; MO GUI MBI FURU NA MO ... | \n", "MO KPA GUI A ALBUNOS | \n", "NaN | \n", "NaN | \n", "
6 | \n", "RELIGION ; ETHNIQUE , FOOT-BALL | \n", "RELIGION | \n", "ETHNIQUE | \n", "NaN | \n", "
7 | \n", "VOYOUTISME ; IMPURE ; MECREANT ; | \n", "VOYOUTISME | \n", "IMPURE | \n", "NaN | \n", "
8 | \n", "KPANDA ; LE TI BONGO TI MO OKO SO KOUE ; MO S... | \n", "kPANDA | \n", "LE TI BONGO T I MO OKO SO KOUE | \n", "MO SO MO YEKE NA GNE | \n", "
9 | \n", "FAUSSEUR ; HOMO SEXUEL ; BARBARIE ; | \n", "FAUSSEUR | \n", "HOMO SEXUEL | \n", "BARBARIE | \n", "
10 | \n", "CONSIDERER LES MUSULMANS COMME LES TERRORISTES | \n", "IDIOT | \n", "BANDAYE | \n", "NaN | \n", "
11 | \n", "BRAQUEUR ; DESORDONNE ; FOU ; | \n", "BRAQUEUR | \n", "DESORDONNE | \n", "NaN | \n", "
12 | \n", "EGALITE ENTRE LES SEXSES ; GUERRE ; NATIONNALI... | \n", "EGALITE ENTRE LES SEXES | \n", "DROIT DE L'ENFANT | \n", "NaN | \n", "
13 | \n", "CRFISE DE COVID 19 ; CRISE ECONOMIQUE ; | \n", "GROUPE DE BANDITS | \n", "ILLETRE | \n", "NaN | \n", "
14 | \n", "Balaka, Seleka, a baba so | \n", "Balaka | \n", "Seleka | \n", "Ti ala a baba so | \n", "
15 | \n", "Gagango, arabou, soukoula bi | \n", "Gagango | \n", "Arabou | \n", "Soukoula mbi | \n", "
16 | \n", "SARANGA ; BORDEL | \n", "SARANGA | \n", "NaN | \n", "NaN | \n", "
17 | \n", "A GA GANGO ; ALA GA LAWA ; MO NI SO KOUE LA | \n", "A GA GANGO | \n", "ALA GA LAWA | \n", "NaN | \n", "
18 | \n", "ESCROC ; IDIOT ; I MOU MO | \n", "ESCROC | \n", "NaN | \n", "NaN | \n", "
19 | \n", "L'insulte,la division, le racisme, l'ethnocent... | \n", "Idiot | \n", "Bon à rien! | \n", "Âne | \n", "
\n", " | terms_list | \n", "term_1 | \n", "term_2 | \n", "term_3 | \n", "no_contract | \n", "
---|---|---|---|---|---|
0 | \n", "SARANGA TI WALI ; TAXI -MOTO ; VOYOU ;, | \n", "SARANGA TI WALI | \n", "TAXI-MOTO | \n", "VOYOUX | \n", "[SARANGA, TI, WALI, ;, TAXI, -MOTO, ;, VOYOU, ;,] | \n", "
1 | \n", "rien | \n", "rien | \n", "NaN | \n", "NaN | \n", "[rien] | \n", "
2 | \n", "HAINE ; RELIGION ; ETHNIQUE ; | \n", "HAINE | \n", "NaN | \n", "NaN | \n", "[HAINE, ;, RELIGION, ;, ETHNIQUE, ;] | \n", "
3 | \n", "TETUE ; VOYOU ; MO YINGA MBI ? | \n", "TETUE | \n", "NaN | \n", "NaN | \n", "[TETUE, ;, VOYOU, ;, MO, YINGA, MBI, ?] | \n", "
4 | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES; LES YAKO... | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES | \n", "LES YAKOMAS SONT DES ORGUEILLEUX | \n", "LES MANDJA SONT DES GRANDS VOLEURS ; | \n", "[LES, GBAKAS, MANDJA, SONT, TROP, EGOISTES;, L... | \n", "
\n", " | terms_list | \n", "term_1 | \n", "term_2 | \n", "term_3 | \n", "no_contract | \n", "msg_str | \n", "
---|---|---|---|---|---|---|
0 | \n", "SARANGA TI WALI ; TAXI -MOTO ; VOYOU ;, | \n", "SARANGA TI WALI | \n", "TAXI-MOTO | \n", "VOYOUX | \n", "[SARANGA, TI, WALI, ;, TAXI, -MOTO, ;, VOYOU, ;,] | \n", "SARANGA TI WALI ; TAXI -MOTO ; VOYOU ;, | \n", "
1 | \n", "rien | \n", "rien | \n", "NaN | \n", "NaN | \n", "[rien] | \n", "rien | \n", "
2 | \n", "HAINE ; RELIGION ; ETHNIQUE ; | \n", "HAINE | \n", "NaN | \n", "NaN | \n", "[HAINE, ;, RELIGION, ;, ETHNIQUE, ;] | \n", "HAINE ; RELIGION ; ETHNIQUE ; | \n", "
3 | \n", "TETUE ; VOYOU ; MO YINGA MBI ? | \n", "TETUE | \n", "NaN | \n", "NaN | \n", "[TETUE, ;, VOYOU, ;, MO, YINGA, MBI, ?] | \n", "TETUE ; VOYOU ; MO YINGA MBI ? | \n", "
4 | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES; LES YAKO... | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES | \n", "LES YAKOMAS SONT DES ORGUEILLEUX | \n", "LES MANDJA SONT DES GRANDS VOLEURS ; | \n", "[LES, GBAKAS, MANDJA, SONT, TROP, EGOISTES;, L... | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES; LES YAKO... | \n", "
\n", " | terms_list | \n", "term_1 | \n", "term_2 | \n", "term_3 | \n", "no_contract | \n", "msg_str | \n", "tokenized | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "SARANGA TI WALI ; TAXI -MOTO ; VOYOU ;, | \n", "SARANGA TI WALI | \n", "TAXI-MOTO | \n", "VOYOUX | \n", "[SARANGA, TI, WALI, ;, TAXI, -MOTO, ;, VOYOU, ;,] | \n", "SARANGA TI WALI ; TAXI -MOTO ; VOYOU ;, | \n", "[SARANGA, TI, WALI, ;, TAXI, -MOTO, ;, VOYOU, ... | \n", "
1 | \n", "rien | \n", "rien | \n", "NaN | \n", "NaN | \n", "[rien] | \n", "rien | \n", "[rien] | \n", "
2 | \n", "HAINE ; RELIGION ; ETHNIQUE ; | \n", "HAINE | \n", "NaN | \n", "NaN | \n", "[HAINE, ;, RELIGION, ;, ETHNIQUE, ;] | \n", "HAINE ; RELIGION ; ETHNIQUE ; | \n", "[HAINE, ;, RELIGION, ;, ETHNIQUE, ;] | \n", "
3 | \n", "TETUE ; VOYOU ; MO YINGA MBI ? | \n", "TETUE | \n", "NaN | \n", "NaN | \n", "[TETUE, ;, VOYOU, ;, MO, YINGA, MBI, ?] | \n", "TETUE ; VOYOU ; MO YINGA MBI ? | \n", "[TETUE, ;, VOYOU, ;, MO, YINGA, MBI, ?] | \n", "
4 | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES; LES YAKO... | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES | \n", "LES YAKOMAS SONT DES ORGUEILLEUX | \n", "LES MANDJA SONT DES GRANDS VOLEURS ; | \n", "[LES, GBAKAS, MANDJA, SONT, TROP, EGOISTES;, L... | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES; LES YAKO... | \n", "[LES, GBAKAS, MANDJA, SONT, TROP, EGOISTES, ;,... | \n", "
\n", " | terms_list | \n", "term_1 | \n", "term_2 | \n", "term_3 | \n", "no_contract | \n", "msg_str | \n", "tokenized | \n", "lower | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "SARANGA TI WALI ; TAXI -MOTO ; VOYOU ;, | \n", "SARANGA TI WALI | \n", "TAXI-MOTO | \n", "VOYOUX | \n", "[SARANGA, TI, WALI, ;, TAXI, -MOTO, ;, VOYOU, ;,] | \n", "SARANGA TI WALI ; TAXI -MOTO ; VOYOU ;, | \n", "[SARANGA, TI, WALI, ;, TAXI, -MOTO, ;, VOYOU, ... | \n", "[saranga, ti, wali, ;, taxi, -moto, ;, voyou, ... | \n", "
1 | \n", "rien | \n", "rien | \n", "NaN | \n", "NaN | \n", "[rien] | \n", "rien | \n", "[rien] | \n", "[rien] | \n", "
2 | \n", "HAINE ; RELIGION ; ETHNIQUE ; | \n", "HAINE | \n", "NaN | \n", "NaN | \n", "[HAINE, ;, RELIGION, ;, ETHNIQUE, ;] | \n", "HAINE ; RELIGION ; ETHNIQUE ; | \n", "[HAINE, ;, RELIGION, ;, ETHNIQUE, ;] | \n", "[haine, ;, religion, ;, ethnique, ;] | \n", "
3 | \n", "TETUE ; VOYOU ; MO YINGA MBI ? | \n", "TETUE | \n", "NaN | \n", "NaN | \n", "[TETUE, ;, VOYOU, ;, MO, YINGA, MBI, ?] | \n", "TETUE ; VOYOU ; MO YINGA MBI ? | \n", "[TETUE, ;, VOYOU, ;, MO, YINGA, MBI, ?] | \n", "[tetue, ;, voyou, ;, mo, yinga, mbi, ?] | \n", "
4 | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES; LES YAKO... | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES | \n", "LES YAKOMAS SONT DES ORGUEILLEUX | \n", "LES MANDJA SONT DES GRANDS VOLEURS ; | \n", "[LES, GBAKAS, MANDJA, SONT, TROP, EGOISTES;, L... | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES; LES YAKO... | \n", "[LES, GBAKAS, MANDJA, SONT, TROP, EGOISTES, ;,... | \n", "[les, gbakas, mandja, sont, trop, egoistes, ;,... | \n", "
\n", " | terms_list | \n", "term_1 | \n", "term_2 | \n", "term_3 | \n", "no_contract | \n", "msg_str | \n", "tokenized | \n", "lower | \n", "no_punc | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "SARANGA TI WALI ; TAXI -MOTO ; VOYOU ;, | \n", "SARANGA TI WALI | \n", "TAXI-MOTO | \n", "VOYOUX | \n", "[SARANGA, TI, WALI, ;, TAXI, -MOTO, ;, VOYOU, ;,] | \n", "SARANGA TI WALI ; TAXI -MOTO ; VOYOU ;, | \n", "[SARANGA, TI, WALI, ;, TAXI, -MOTO, ;, VOYOU, ... | \n", "[saranga, ti, wali, ;, taxi, -moto, ;, voyou, ... | \n", "[saranga, ti, wali, taxi, -moto, voyou] | \n", "
1 | \n", "rien | \n", "rien | \n", "NaN | \n", "NaN | \n", "[rien] | \n", "rien | \n", "[rien] | \n", "[rien] | \n", "[rien] | \n", "
2 | \n", "HAINE ; RELIGION ; ETHNIQUE ; | \n", "HAINE | \n", "NaN | \n", "NaN | \n", "[HAINE, ;, RELIGION, ;, ETHNIQUE, ;] | \n", "HAINE ; RELIGION ; ETHNIQUE ; | \n", "[HAINE, ;, RELIGION, ;, ETHNIQUE, ;] | \n", "[haine, ;, religion, ;, ethnique, ;] | \n", "[haine, religion, ethnique] | \n", "
3 | \n", "TETUE ; VOYOU ; MO YINGA MBI ? | \n", "TETUE | \n", "NaN | \n", "NaN | \n", "[TETUE, ;, VOYOU, ;, MO, YINGA, MBI, ?] | \n", "TETUE ; VOYOU ; MO YINGA MBI ? | \n", "[TETUE, ;, VOYOU, ;, MO, YINGA, MBI, ?] | \n", "[tetue, ;, voyou, ;, mo, yinga, mbi, ?] | \n", "[tetue, voyou, mo, yinga, mbi] | \n", "
4 | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES; LES YAKO... | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES | \n", "LES YAKOMAS SONT DES ORGUEILLEUX | \n", "LES MANDJA SONT DES GRANDS VOLEURS ; | \n", "[LES, GBAKAS, MANDJA, SONT, TROP, EGOISTES;, L... | \n", "LES GBAKAS MANDJA SONT TROP EGOISTES; LES YAKO... | \n", "[LES, GBAKAS, MANDJA, SONT, TROP, EGOISTES, ;,... | \n", "[les, gbakas, mandja, sont, trop, egoistes, ;,... | \n", "[les, gbakas, mandja, sont, trop, egoistes, le... | \n", "