Skip to content

Dabidou025 #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ jobs:
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics
flake8 . --count --max-complexity=10 --max-line-length=80 --statistics
- name: Check doc style with pydocstyle
run: pydocstyle

13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Assignment 2 for the DataCamp course X-DataScience Master
# Assignment 2 for the DataCamp course X-DataScience Master - pandas

## What we want you to learn by doing this assignment:

Expand All @@ -10,11 +10,16 @@

## How?

- For the repository by clicking on the `Fork` button on the upper right corner
- Fork the repository by clicking on the `Fork` button on the upper right corner
- Clone the repository of your fork with: `git clone https://github.com/MYLOGIN/datacamp-assignment-pandas` (replace MYLOGIN with your GitHub login)
- Create a branch called `myassignment` using `git checkout -b myassignment`
- Create a branch called `MYLOGIN` using `git checkout -b MYLOGIN`
- Make the changes to complete the assignment. You have to modify the files that contain `questions` in their name. Do not modify the files that start with `test_`.
- Open the pull request on GitHub
- Check locally that your solution meet the test by running `pytest` from the root of the repo. You may need to install `pytest` using `pip` or `conda`.
- Check the code formating for your solution using `flake8`. You may need to install `flake8` using `pip` or `conda`.
- Open the pull request on GitHub:
- Create a commit with `git add -u` and `git commit -m "UP my solution"`
- Push your branch on your fork: `git push -u origin MYLOGIN`
- Go to your repo in your browser and click the `Open a PR` button.
- Keep pushing to your branch until the continuous integration system is green.
- When it is green notify the professors on Slack that your done.

Expand Down
44 changes: 34 additions & 10 deletions pandas_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@

def load_data():
"""Load data from the CSV files referundum/regions/departments."""
referendum = pd.DataFrame({})
regions = pd.DataFrame({})
departments = pd.DataFrame({})
referendum = pd.read_csv('data/referendum.csv', sep=';')
regions = pd.read_csv('data/regions.csv', sep=',')
departments = pd.read_csv('data/departments.csv', sep=',')

return referendum, regions, departments

Expand All @@ -29,7 +29,14 @@ def merge_regions_and_departments(regions, departments):
['code_reg', 'name_reg', 'code_dep', 'name_dep']
"""

return pd.DataFrame({})
df = departments.merge(regions, left_on='region_code', right_on='code')
df = df[['region_code','name_y','code_x','name_x']]
df = df.rename(columns={'region_code': 'code_reg',
'name_y': 'name_reg',
'code_x': 'code_dep',
'name_x': 'name_dep'})

return df


def merge_referendum_and_areas(referendum, regions_and_departments):
Expand All @@ -38,8 +45,10 @@ def merge_referendum_and_areas(referendum, regions_and_departments):
You can drop the lines relative to DOM-TOM-COM departments, and the
french living abroad.
"""
referendum["Department code"] = referendum["Department code"].str.zfill(2)
df = pd.merge(regions_and_departments, referendum, left_on='code_dep', right_on='Department code', how='inner')

return pd.DataFrame({})
return df


def compute_referendum_result_by_regions(referendum_and_areas):
Expand All @@ -49,20 +58,35 @@ def compute_referendum_result_by_regions(referendum_and_areas):
['name_reg', 'Registered', 'Abstentions', 'Null', 'Choice A', 'Choice B']
"""

return pd.DataFrame({})
df = referendum_and_areas.groupby(['name_reg', 'code_reg'], as_index=False).agg('sum')
df = df.drop('Town code', axis=1)
df = df.set_index("code_reg")

return df


def plot_referendum_map(referendum_result_by_region):
def plot_referendum_map(referendum_result_by_regions):
"""Plot a map with the results from the referendum.

* Load the geographic data with geopandas from `regions.geojson`.
* Merge these info in `referendum_and_areas`.
* Merge these info into `referendum_result_by_regions`.
* Use the method `GeoDataFrame.plot` to display the result map. The results
should display the rate of 'Choice A' over all expressed ballots.
* Return a gpd.GeoDataFrame with a column 'ratio' containing the results.
"""

return gpd.GeoDataFrame({})
geo_reg = gpd.read_file('data/regions.geojson')
geo_referundum = geo_reg.merge(referendum_result_by_regions,
left_on='code', right_on='code_reg',
how='inner')
geo_referundum['ratio'] = geo_referundum['Choice A'] / (geo_referundum['Registered'] -
geo_referundum['Abstentions'] -
geo_referundum['Null'])

geo_referundum.rename({'nom': 'name_reg'})
geo_referundum.plot(column='ratio', legend=True,
legend_kwds={'label': "choice_A_ratio",
'orientation': "horizontal"})
return geo_referundum


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ numpy
pandas
pytest
geopandas
descartes
20 changes: 10 additions & 10 deletions test_pandas_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,29 +69,29 @@ def test_compute_referendum_result_by_regions():
referendum_and_areas = merge_referendum_and_areas(
referendum, regions_and_departments
)
referendum_result_by_region = compute_referendum_result_by_regions(
referendum_result_by_regions = compute_referendum_result_by_regions(
referendum_and_areas
)

# Check result shape
assert set(referendum_result_by_region.columns) == set([
assert set(referendum_result_by_regions.columns) == set([
'name_reg', 'Registered', 'Abstentions', 'Null', 'Choice A', 'Choice B'
]), (
'To keep the name of the region, you can use either another merge or '
'a clever groupby.'
)
assert referendum_result_by_region.shape == (13, 6)
assert referendum_result_by_regions.shape == (13, 6)

# check that some of the values
referendum_result_by_region = referendum_result_by_region.set_index(
referendum_result_by_regions = referendum_result_by_regions.set_index(
'name_reg'
)
assert referendum_result_by_region['Registered'].sum() == 43_262_592
assert referendum_result_by_region.loc[
assert referendum_result_by_regions['Registered'].sum() == 43_262_592
assert referendum_result_by_regions.loc[
'Normandie', 'Abstentions'] == 426_075
assert referendum_result_by_region.loc[
assert referendum_result_by_regions.loc[
'Grand Est', 'Choice A'] == 1_088_684
assert referendum_result_by_region.loc['Occitanie', 'Null'] == 62_732
assert referendum_result_by_regions.loc['Occitanie', 'Null'] == 62_732


def test_plot_referendum_map():
Expand All @@ -102,10 +102,10 @@ def test_plot_referendum_map():
referendum_and_areas = merge_referendum_and_areas(
referendum, regions_and_departments
)
referendum_result_by_region = compute_referendum_result_by_regions(
referendum_result_by_regions = compute_referendum_result_by_regions(
referendum_and_areas
)
gdf_referendum = plot_referendum_map(referendum_result_by_region)
gdf_referendum = plot_referendum_map(referendum_result_by_regions)

assert isinstance(gdf_referendum, gpd.GeoDataFrame), (
"The return object should be a GeoDataFrame, not a "
Expand Down