Files
blog/static/yWEXvw.html
Himadri Bhattacharjee 5acf14987f a nother thing
2025-10-06 09:10:12 +05:30

97 lines
43 KiB
HTML

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="icon" crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/favicon.ico" />
<!-- Preload is necessary because we show these images when we disconnect from the server,
but at that point we cannot load these images from the server -->
<link rel="preload" crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/assets/gradient-yHQUC_QB.png" as="image" />
<link rel="preload" crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/assets/noise-60BoTA8O.png" as="image" />
<!-- Preload the fonts -->
<link rel="preload" crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/assets/Lora-VariableFont_wght-B2ootaw-.ttf" as="font" crossorigin="anonymous" />
<link rel="preload" crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/assets/PTSans-Regular-CxL0S8W7.ttf" as="font" crossorigin="anonymous" />
<link rel="preload" crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/assets/PTSans-Bold-D9fedIX3.ttf" as="font" crossorigin="anonymous" />
<link rel="preload" crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/assets/FiraMono-Regular-BTCkDNvf.ttf" as="font" crossorigin="anonymous" />
<link rel="preload" crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/assets/FiraMono-Medium-DU3aDxX5.ttf" as="font" crossorigin="anonymous" />
<link rel="preload" crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/assets/FiraMono-Bold-CLVRCuM9.ttf" as="font" crossorigin="anonymous" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="theme-color" content="#000000" />
<meta name="description" content="a marimo app" />
<link rel="apple-touch-icon" crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/apple-touch-icon.png" />
<link rel="manifest" crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/manifest.json" />
<script data-marimo="true">
function __resizeIframe(obj) {
var scrollbarHeight = 20; // Max between windows, mac, and linux
function setHeight() {
var element = obj.contentWindow.document.documentElement;
// If there is no vertical scrollbar, we don't need to resize the iframe
if (element.scrollHeight === element.clientHeight) {
return;
}
// Create a new height that includes the scrollbar height if it's visible
var hasHorizontalScrollbar = element.scrollWidth > element.clientWidth;
var newHeight = element.scrollHeight + (hasHorizontalScrollbar ? scrollbarHeight : 0);
// Only update the height if it's different from the current height
if (obj.style.height !== `${newHeight}px`) {
obj.style.height = `${newHeight}px`;
}
}
// Resize the iframe to the height of the content and bottom scrollbar height
setHeight();
// Resize the iframe when the content changes
const resizeObserver = new ResizeObserver((entries) => {
setHeight();
});
resizeObserver.observe(obj.contentWindow.document.body);
}
</script>
<marimo-filename hidden>milestone-1.py</marimo-filename>
<!-- TODO(Trevor): Legacy, required by VS Code plugin. Remove when plugin is updated (see marimo/server/_templates/template.py) -->
<marimo-version data-version="{{ version }}" hidden></marimo-version>
<marimo-user-config data-config="{{ user_config }}" hidden></marimo-user-config>
<marimo-server-token data-token="{{ server_token }}" hidden></marimo-server-token>
<!-- /TODO -->
<title>milestone-1</title>
<script type="module" crossorigin crossorigin="anonymous" src="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/assets/index-BB71mG_d.js"></script>
<link rel="stylesheet" crossorigin crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/@marimo-team/frontend@0.15.2/dist/assets/index-DryPQDfA.css">
<script data-marimo="true">
window.__MARIMO_STATIC__ = {};
window.__MARIMO_STATIC__.files = {};
</script>
</head>
<body>
<div id="root"></div>
<!-- This is a portal for the data editor to render in -->
<div id="portal" data-testid="glide-portal" style="position: fixed; left: 0; top: 0; z-index: 9999"></div>
<script data-marimo="true">
window.__MARIMO_MOUNT_CONFIG__ = {
"filename": "milestone-1.py",
"mode": "read",
"version": "0.15.2",
"serverToken": "static",
"config": {"ai": {"models": {"custom_models": [], "displayed_models": []}}, "completion": {"activate_on_typing": true, "copilot": false}, "display": {"cell_output": "above", "code_editor_font_size": 14, "dataframes": "rich", "default_table_max_columns": 50, "default_table_page_size": 10, "default_width": "medium", "reference_highlighting": false, "theme": "light"}, "formatting": {"line_length": 79}, "keymap": {"overrides": {}, "preset": "default"}, "language_servers": {"pylsp": {"enable_flake8": false, "enable_mypy": true, "enable_pydocstyle": false, "enable_pyflakes": false, "enable_pylint": false, "enable_ruff": true, "enabled": true}}, "package_management": {"manager": "pip"}, "runtime": {"auto_instantiate": true, "auto_reload": "off", "default_sql_output": "auto", "on_cell_change": "autorun", "output_max_bytes": 8000000, "reactive_tests": true, "std_stream_max_bytes": 1000000, "watcher_on_save": "lazy"}, "save": {"autosave": "after_delay", "autosave_delay": 1000, "format_on_save": false}, "server": {"browser": "default", "follow_symlink": false}, "snippets": {"custom_paths": [], "include_default_snippets": true}},
"configOverrides": {},
"appConfig": {"sql_output": "auto", "width": "compact"},
"view": {"showAppCode": true},
"notebook": {"cells": [{"code": "import numpy as np\nimport pandas as pd\nfrom pathlib import Path", "code_hash": "aa00d7d5ce2ff059997c6ea778050566", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "Hbol", "name": "_"}, {"code": "import os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))", "code_hash": "c4ba1e49470be02d82d3a3f8ebbbc051", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "MJUe", "name": "_"}, {"code": "mo.md(r\"\"\"## Exploratory Data Analysis\"\"\")", "code_hash": "7c981643a0add3b9f5f279306c46bc88", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "vblA", "name": "_"}, {"code": "mo.md(r\"\"\"A simple function to load the data from the respective CSV files without the hassle of writing the full path.\"\"\")", "code_hash": "947aa505aa5fd3d4d45ed44750ab34fd", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "bkHC", "name": "_"}, {"code": "def load_df(name: str) -> pd.DataFrame:\n return pd.read_csv(Path('Cinema_Audience_Forecasting_challenge') / name / f'{name}.csv')", "code_hash": "c0205b362ee11f27b1ac6041585bcc41", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "lEQa", "name": "_"}, {"code": "mo.md(r\"\"\"To organize the different dataframes, we use simple namespaces\"\"\")", "code_hash": "b17a5c4d47464bed3f6542e67bb40cc5", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "PKri", "name": "_"}, {"code": "from types import SimpleNamespace", "code_hash": "9990b3c94ada7878ab52be7021d65854", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "Xref", "name": "_"}, {"code": "booknow = SimpleNamespace(\n visits=load_df('booknow_visits'),\n booking=load_df('booknow_booking'),\n theaters=load_df('booknow_theaters')\n)", "code_hash": "2f9e0fb43674d692333caef61a67c507", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "SFPL", "name": "_"}, {"code": "mo.md(r\"\"\"What is the shape of the dataset named booknow_booking?\"\"\")", "code_hash": "3b9d27fd52303684d2720d6861f75fb4", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "BYtC", "name": "_"}, {"code": "booknow.booking.shape", "code_hash": "ec8921136f90c5fab1f2410408ac7120", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "RGSE", "name": "_"}, {"code": "booknow.booking.columns", "code_hash": "51a28d4ae1562f4cc9a67e23df7f59b2", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "Kclp", "name": "_"}, {"code": "cinepos = SimpleNamespace(\n booking=load_df('cinePOS_booking'),\n theaters=load_df('cinePOS_theaters')\n)", "code_hash": "483c60481b2ce729f71d6736b763639b", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "emfo", "name": "_"}, {"code": "mo.md(\n r\"\"\"\nHow many columns are of type `Object` in the dataset named `cinePOS_theaters`?\n\nWe will use an ephemeral function to not pollute the global namespace.\n\"\"\"\n)", "code_hash": "a3fd5e3039401a020cfef9e6fa1a97a6", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "Hstk", "name": "_"}, {"code": "def _():\n metadata = cinepos.theaters.dtypes\n object_columns = metadata[metadata == 'object']\n print(object_columns)\n return len(object_columns)\n_()", "code_hash": "a0a211fb7cf4cfee6ed202cc1418e63f", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "nWHF", "name": "_"}, {"code": "mo.md(\n r\"\"\"\nWhich of the following theater types is the *most frequent* in the dataset named `booknow_theaters`?\n\nLet's check which column has theater types.\n\"\"\"\n)", "code_hash": "a696ab9d5a4ae3b3da1e84d87060c238", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "iLit", "name": "_"}, {"code": "booknow.theaters.columns", "code_hash": "461ebc85c44896e98a6969e572abc3e6", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "ZHCJ", "name": "_"}, {"code": "mo.md(r\"\"\"Okay so we can use `pd.Series.mode` on the `theater_type` column.\"\"\")", "code_hash": "3f51ab85f77da67553eaee2115478964", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "ROlb", "name": "_"}, {"code": "booknow.theaters.theater_type.mode()", "code_hash": "6c37578a8b83dd35a807adfeb5d85d63", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "qnkX", "name": "_"}, {"code": "mo.md(r\"\"\"How many *distinct theaters* are present in the dataset named `cinePOS_booking`?\"\"\")", "code_hash": "3bbb6260610cb6c344c14960092141d1", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "TqIu", "name": "_"}, {"code": "cinepos.booking.columns", "code_hash": "13c1c63a39e71db6d596f51ebd549f53", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "Vxnm", "name": "_"}, {"code": "mo.md(r\"\"\"Let's check if this has any `NaN` values.\"\"\")", "code_hash": "e448866663a722594c70b538fae0459e", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "DnEU", "name": "_"}, {"code": "cinepos.booking.cine_theater_id.isna().sum()", "code_hash": "49ed922ecd063c878707de8237bed366", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "ulZA", "name": "_"}, {"code": "cinepos.booking.cine_theater_id.nunique()", "code_hash": "4cdde8e08b83793b26cd2ad9f20b971c", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "ecfG", "name": "_"}, {"code": "mo.md(r\"\"\"Combine the datasets booknow_booking and booknow_theaters. What is the shape of this dataset?\"\"\")", "code_hash": "caf621dc96741311529757d7d41773b3", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "Pvdt", "name": "_"}, {"code": "# how='inner' by default\n# on='...' is the intersecting column by default\n# thus, on='book_theater_id'\nbooknow.booking_theaters_inner = booknow.booking.merge(booknow.theaters)", "code_hash": "0f38befcaa2738d3f17435e118ac7aeb", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "ZBYS", "name": "_"}, {"code": "booknow.booking_theaters_inner.shape", "code_hash": "3f922f07c1cc85aad39d9b645b72d7e7", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "aLJB", "name": "_"}, {"code": "mo.md(r\"\"\"Based on the combined dataset obtained in the previous question, how many distinct areas are the theaters located in?\"\"\")", "code_hash": "616e5d603be373453cd00a3d73d25bed", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "nHfw", "name": "_"}, {"code": "booknow.booking_theaters_inner.theater_area.nunique()", "code_hash": "316635562111516d76995606aef6a781", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "xXTn", "name": "_"}, {"code": "mo.md(r\"\"\"Combine the datasets movie_theater_id_relation, cinePOS_booking and cinePOS_theaters. Enter the column names containing missing values as comma seperated values.\"\"\")", "code_hash": "3065beeb600cc61f898df938a0331bb7", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "AjVT", "name": "_"}, {"code": "cinepos.id_relation = load_df('movie_theater_id_relation')", "code_hash": "3a8e7432d409a241c92ccabc407ee5b7", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "pHFh", "name": "_"}, {"code": "print(cinepos.id_relation.columns)\nprint(cinepos.booking.columns)\nprint(cinepos.theaters.columns)", "code_hash": "2eb56c472584bdf9e4854cd02990dd2b", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "NCOB", "name": "_"}, {"code": "def _():\n merged = cinepos.id_relation.merge(cinepos.booking).merge(cinepos.theaters)\n isna = merged.isna().sum()\n print(isna)\n print(\"missing value columns:\", ','.join(isna[isna > 0].index))\n_()", "code_hash": "6e2864f0a95682822e12025f19b6c649", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "aqbW", "name": "_"}, {"code": "mo.md(r\"\"\"Enter the date range spanned in the dataset named booknow_booking.\"\"\")", "code_hash": "3d192b667bf02f620fe71a3b765a5bb0", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "TRpd", "name": "_"}, {"code": "booknow.booking.show_datetime = pd.to_datetime(booknow.booking.show_datetime)\nbooknow.booking.booking_datetime = pd.to_datetime(booknow.booking.booking_datetime)", "code_hash": "9692c1dbe20299216b519a6a58330227", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "TXez", "name": "_"}, {"code": "def iso_to_british_date(time):\n return time.strftime('%d-%m-%Y')", "code_hash": "3d33a7d737f10058b39022289d26f869", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "dNNg", "name": "*iso_to_british_date"}, {"code": "print(\n iso_to_british_date(booknow.booking.show_datetime.dt.date.min()),\n \"to\",\n iso_to_british_date(booknow.booking.show_datetime.dt.date.max())\n)", "code_hash": "48d79ba7faaa1d65ca8426b8e2449190", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "yCnT", "name": "_"}, {"code": "print(\n iso_to_british_date(booknow.booking.booking_datetime.dt.date.min()),\n \"to\",\n iso_to_british_date(booknow.booking.booking_datetime.dt.date.max())\n)", "code_hash": "c85b5bea46d59101e381beb4d9fe8ba1", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "wlCL", "name": "_"}, {"code": "mo.md(r\"\"\"Fortunately, there are no inconsitencies between the range of `show_datetime` and `booking_datetime`.\"\"\")", "code_hash": "d36d28d35a453d0852d9bdaf128efad5", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "kqZH", "name": "_"}, {"code": "mo.md(r\"\"\"Enter the date range spanned in the test dataset (sample_submission).\"\"\")", "code_hash": "710aec3e7bd2f579eb0eb4d2ec25daf8", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "wAgl", "name": "_"}, {"code": "submission = SimpleNamespace(\n sample=load_df('sample_submission')\n)", "code_hash": "1bcd79d8cad179083f1c4e57b3bdd34f", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "rEll", "name": "_"}, {"code": "submission.sample.head()", "code_hash": "c1ad4af66c48ba2524e929b56492afe8", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "dGlV", "name": "_"}, {"code": "mo.md(r\"\"\"We have to split the IDs.\"\"\")", "code_hash": "04b27e9e2e15009b6ec6c25f46f4d205", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "SdmI", "name": "_"}, {"code": "submission.input = pd.DataFrame()\nsubmission.input[['ID', 'date']] = submission.sample.ID.str.rsplit(\"_\", n=1, expand=True)\nsubmission.input.date = pd.to_datetime(submission.input.date)", "code_hash": "46462f7a3522c4cff6e5fb3cfeb29fa0", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "lgWD", "name": "_"}, {"code": "submission.input.head()", "code_hash": "72452685700db28d78631577d496df85", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "yOPj", "name": "_"}, {"code": "mo.md(r\"\"\"Very well, let's find the range now.\"\"\")", "code_hash": "01c265b1bc0d6ffdc5c680ea937a4fe9", "config": {"column": null, "disabled": false, "hide_code": true}, "id": "fwwy", "name": "_"}, {"code": "print(\n iso_to_british_date(submission.input.date.dt.date.min()),\n \"to\",\n iso_to_british_date(submission.input.date.dt.date.max())\n)", "code_hash": "6442a9f04a47412dedc7bfea42ece550", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "LJZf", "name": "_"}, {"code": "import marimo as mo", "code_hash": "1d0db38904205bec4d6f6f6a1f6cec3e", "config": {"column": null, "disabled": false, "hide_code": false}, "id": "urSm", "name": "_"}], "metadata": {"marimo_version": "0.15.2"}, "version": "1"},
"session": {"cells": [{"code_hash": "aa00d7d5ce2ff059997c6ea778050566", "console": [], "id": "Hbol", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "c4ba1e49470be02d82d3a3f8ebbbc051", "console": [], "id": "MJUe", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "7c981643a0add3b9f5f279306c46bc88", "console": [], "id": "vblA", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><h2 id=\"exploratory-data-analysis\">Exploratory Data Analysis</h2></span>"}, "type": "data"}]}, {"code_hash": "947aa505aa5fd3d4d45ed44750ab34fd", "console": [], "id": "bkHC", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">A simple function to load the data from the respective CSV files without the hassle of writing the full path.</span></span>"}, "type": "data"}]}, {"code_hash": "c0205b362ee11f27b1ac6041585bcc41", "console": [], "id": "lEQa", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "b17a5c4d47464bed3f6542e67bb40cc5", "console": [], "id": "PKri", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">To organize the different dataframes, we use simple namespaces</span></span>"}, "type": "data"}]}, {"code_hash": "9990b3c94ada7878ab52be7021d65854", "console": [], "id": "Xref", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "2f9e0fb43674d692333caef61a67c507", "console": [], "id": "SFPL", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "3b9d27fd52303684d2720d6861f75fb4", "console": [], "id": "BYtC", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">What is the shape of the dataset named booknow_booking?</span></span>"}, "type": "data"}]}, {"code_hash": "ec8921136f90c5fab1f2410408ac7120", "console": [], "id": "RGSE", "outputs": [{"data": {"application/json": "[68336, 4]"}, "type": "data"}]}, {"code_hash": "51a28d4ae1562f4cc9a67e23df7f59b2", "console": [], "id": "Kclp", "outputs": [{"data": {"text/html": "<pre style='font-size: 12px'>Index([&#x27;book_theater_id&#x27;, &#x27;show_datetime&#x27;, &#x27;booking_datetime&#x27;,\n &#x27;tickets_booked&#x27;],\n dtype=&#x27;object&#x27;)</pre>"}, "type": "data"}]}, {"code_hash": "483c60481b2ce729f71d6736b763639b", "console": [], "id": "emfo", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "a3fd5e3039401a020cfef9e6fa1a97a6", "console": [], "id": "Hstk", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">How many columns are of type <code>Object</code> in the dataset named <code>cinePOS_theaters</code>?</span>\n<span class=\"paragraph\">We will use an ephemeral function to not pollute the global namespace.</span></span>"}, "type": "data"}]}, {"code_hash": "a0a211fb7cf4cfee6ed202cc1418e63f", "console": [{"name": "stdout", "text": "cine_theater_id object\ntheater_type object\ntheater_area object\ndtype: object\n", "type": "stream"}], "id": "nWHF", "outputs": [{"data": {"text/html": "<pre style='font-size: 12px'>3</pre>"}, "type": "data"}]}, {"code_hash": "a696ab9d5a4ae3b3da1e84d87060c238", "console": [], "id": "iLit", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">Which of the following theater types is the <em>most frequent</em> in the dataset named <code>booknow_theaters</code>?</span>\n<span class=\"paragraph\">Let's check which column has theater types.</span></span>"}, "type": "data"}]}, {"code_hash": "461ebc85c44896e98a6969e572abc3e6", "console": [], "id": "ZHCJ", "outputs": [{"data": {"text/html": "<pre style='font-size: 12px'>Index([&#x27;book_theater_id&#x27;, &#x27;theater_type&#x27;, &#x27;theater_area&#x27;, &#x27;latitude&#x27;,\n &#x27;longitude&#x27;],\n dtype=&#x27;object&#x27;)</pre>"}, "type": "data"}]}, {"code_hash": "3f51ab85f77da67553eaee2115478964", "console": [], "id": "ROlb", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">Okay so we can use <code>pd.Series.mode</code> on the <code>theater_type</code> column.</span></span>"}, "type": "data"}]}, {"code_hash": "6c37578a8b83dd35a807adfeb5d85d63", "console": [], "id": "qnkX", "outputs": [{"data": {"text/html": "<marimo-ui-element object-id='qnkX-0' random-id='d78a6e73-d76e-73a2-0df3-a33ca2ef7e82'><marimo-table data-initial-value='[]' data-label='null' data-data='&quot;[{&#92;&quot;theater_type&#92;&quot;:&#92;&quot;Other&#92;&quot;}]&quot;' data-total-rows='1' data-total-columns='1' data-max-columns='50' data-banner-text='&quot;&quot;' data-pagination='true' data-page-size='10' data-field-types='[[&quot;theater_type&quot;, [&quot;string&quot;, &quot;object&quot;]]]' data-show-filters='true' data-show-download='true' data-show-column-summaries='false' data-show-data-types='true' data-show-page-size-selector='false' data-show-column-explorer='true' data-show-chart-builder='true' data-row-headers='[]' data-has-stable-row-id='false' data-lazy='false' data-preload='false'></marimo-table></marimo-ui-element>"}, "type": "data"}]}, {"code_hash": "3bbb6260610cb6c344c14960092141d1", "console": [], "id": "TqIu", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">How many <em>distinct theaters</em> are present in the dataset named <code>cinePOS_booking</code>?</span></span>"}, "type": "data"}]}, {"code_hash": "13c1c63a39e71db6d596f51ebd549f53", "console": [], "id": "Vxnm", "outputs": [{"data": {"text/html": "<pre style='font-size: 12px'>Index([&#x27;cine_theater_id&#x27;, &#x27;show_datetime&#x27;, &#x27;booking_datetime&#x27;, &#x27;tickets_sold&#x27;], dtype=&#x27;object&#x27;)</pre>"}, "type": "data"}]}, {"code_hash": "e448866663a722594c70b538fae0459e", "console": [], "id": "DnEU", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">Let's check if this has any <code>NaN</code> values.</span></span>"}, "type": "data"}]}, {"code_hash": "49ed922ecd063c878707de8237bed366", "console": [], "id": "ulZA", "outputs": [{"data": {"text/html": "<pre style='font-size: 12px'>np.int64(0)</pre>"}, "type": "data"}]}, {"code_hash": "4cdde8e08b83793b26cd2ad9f20b971c", "console": [], "id": "ecfG", "outputs": [{"data": {"text/html": "<pre style='font-size: 12px'>13161</pre>"}, "type": "data"}]}, {"code_hash": "caf621dc96741311529757d7d41773b3", "console": [], "id": "Pvdt", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">Combine the datasets booknow_booking and booknow_theaters. What is the shape of this dataset?</span></span>"}, "type": "data"}]}, {"code_hash": "0f38befcaa2738d3f17435e118ac7aeb", "console": [], "id": "ZBYS", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "3f922f07c1cc85aad39d9b645b72d7e7", "console": [], "id": "aLJB", "outputs": [{"data": {"application/json": "[36899, 8]"}, "type": "data"}]}, {"code_hash": "616e5d603be373453cd00a3d73d25bed", "console": [], "id": "nHfw", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">Based on the combined dataset obtained in the previous question, how many distinct areas are the theaters located in?</span></span>"}, "type": "data"}]}, {"code_hash": "316635562111516d76995606aef6a781", "console": [], "id": "xXTn", "outputs": [{"data": {"text/html": "<pre style='font-size: 12px'>50</pre>"}, "type": "data"}]}, {"code_hash": "3065beeb600cc61f898df938a0331bb7", "console": [], "id": "AjVT", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">Combine the datasets movie_theater_id_relation, cinePOS_booking and cinePOS_theaters. Enter the column names containing missing values as comma seperated values.</span></span>"}, "type": "data"}]}, {"code_hash": "3a8e7432d409a241c92ccabc407ee5b7", "console": [], "id": "pHFh", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "2eb56c472584bdf9e4854cd02990dd2b", "console": [{"name": "stdout", "text": "Index(['book_theater_id', 'cine_theater_id'], dtype='object')\nIndex(['cine_theater_id', 'show_datetime', 'booking_datetime', 'tickets_sold'], dtype='object')\nIndex(['cine_theater_id', 'theater_type', 'theater_area', 'latitude',\n 'longitude'],\n dtype='object')\n", "type": "stream"}], "id": "NCOB", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "6e2864f0a95682822e12025f19b6c649", "console": [{"name": "stdout", "text": "book_theater_id 0\ncine_theater_id 0\nshow_datetime 0\nbooking_datetime 0\ntickets_sold 0\ntheater_type 0\ntheater_area 0\nlatitude 9307\nlongitude 9307\ndtype: int64\nmissing value columns: latitude,longitude\n", "type": "stream"}], "id": "aqbW", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "3d192b667bf02f620fe71a3b765a5bb0", "console": [], "id": "TRpd", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">Enter the date range spanned in the dataset named booknow_booking.</span></span>"}, "type": "data"}]}, {"code_hash": "9692c1dbe20299216b519a6a58330227", "console": [], "id": "TXez", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "3d33a7d737f10058b39022289d26f869", "console": [], "id": "dNNg", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "48d79ba7faaa1d65ca8426b8e2449190", "console": [{"name": "stdout", "text": "01-01-2023 to 28-02-2024\n", "type": "stream"}], "id": "yCnT", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "c85b5bea46d59101e381beb4d9fe8ba1", "console": [{"name": "stdout", "text": "01-01-2023 to 28-02-2024\n", "type": "stream"}], "id": "wlCL", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "d36d28d35a453d0852d9bdaf128efad5", "console": [], "id": "kqZH", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">Fortunately, there are no inconsitencies between the range of <code>show_datetime</code> and <code>booking_datetime</code>.</span></span>"}, "type": "data"}]}, {"code_hash": "710aec3e7bd2f579eb0eb4d2ec25daf8", "console": [], "id": "wAgl", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">Enter the date range spanned in the test dataset (sample_submission).</span></span>"}, "type": "data"}]}, {"code_hash": "1bcd79d8cad179083f1c4e57b3bdd34f", "console": [], "id": "rEll", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "c1ad4af66c48ba2524e929b56492afe8", "console": [], "id": "dGlV", "outputs": [{"data": {"text/html": "<marimo-ui-element object-id='dGlV-0' random-id='a1fba1c2-ee6b-3fab-0e8f-7f63eb75df12'><marimo-table data-initial-value='[]' data-label='null' data-data='&quot;[{&#92;&quot;ID&#92;&quot;:&#92;&quot;book_00001_2024-03-01&#92;&quot;,&#92;&quot;audience_count&#92;&quot;:0},{&#92;&quot;ID&#92;&quot;:&#92;&quot;book_00001_2024-03-02&#92;&quot;,&#92;&quot;audience_count&#92;&quot;:0},{&#92;&quot;ID&#92;&quot;:&#92;&quot;book_00001_2024-03-03&#92;&quot;,&#92;&quot;audience_count&#92;&quot;:0},{&#92;&quot;ID&#92;&quot;:&#92;&quot;book_00001_2024-03-04&#92;&quot;,&#92;&quot;audience_count&#92;&quot;:0},{&#92;&quot;ID&#92;&quot;:&#92;&quot;book_00001_2024-03-06&#92;&quot;,&#92;&quot;audience_count&#92;&quot;:0}]&quot;' data-total-rows='5' data-total-columns='2' data-max-columns='50' data-banner-text='&quot;&quot;' data-pagination='true' data-page-size='10' data-field-types='[[&quot;ID&quot;, [&quot;string&quot;, &quot;object&quot;]], [&quot;audience_count&quot;, [&quot;integer&quot;, &quot;int64&quot;]]]' data-show-filters='true' data-show-download='true' data-show-column-summaries='false' data-show-data-types='true' data-show-page-size-selector='false' data-show-column-explorer='true' data-show-chart-builder='true' data-row-headers='[]' data-has-stable-row-id='false' data-lazy='false' data-preload='false'></marimo-table></marimo-ui-element>"}, "type": "data"}]}, {"code_hash": "04b27e9e2e15009b6ec6c25f46f4d205", "console": [], "id": "SdmI", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">We have to split the IDs.</span></span>"}, "type": "data"}]}, {"code_hash": "46462f7a3522c4cff6e5fb3cfeb29fa0", "console": [], "id": "lgWD", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "72452685700db28d78631577d496df85", "console": [], "id": "yOPj", "outputs": [{"data": {"text/html": "<marimo-ui-element object-id='yOPj-0' random-id='010784f4-4ed1-6475-295b-9484a4fd42e0'><marimo-table data-initial-value='[]' data-label='null' data-data='&quot;[{&#92;&quot;ID&#92;&quot;:&#92;&quot;book_00001&#92;&quot;,&#92;&quot;date&#92;&quot;:&#92;&quot;2024-03-01T00:00:00.000&#92;&quot;},{&#92;&quot;ID&#92;&quot;:&#92;&quot;book_00001&#92;&quot;,&#92;&quot;date&#92;&quot;:&#92;&quot;2024-03-02T00:00:00.000&#92;&quot;},{&#92;&quot;ID&#92;&quot;:&#92;&quot;book_00001&#92;&quot;,&#92;&quot;date&#92;&quot;:&#92;&quot;2024-03-03T00:00:00.000&#92;&quot;},{&#92;&quot;ID&#92;&quot;:&#92;&quot;book_00001&#92;&quot;,&#92;&quot;date&#92;&quot;:&#92;&quot;2024-03-04T00:00:00.000&#92;&quot;},{&#92;&quot;ID&#92;&quot;:&#92;&quot;book_00001&#92;&quot;,&#92;&quot;date&#92;&quot;:&#92;&quot;2024-03-06T00:00:00.000&#92;&quot;}]&quot;' data-total-rows='5' data-total-columns='2' data-max-columns='50' data-banner-text='&quot;&quot;' data-pagination='true' data-page-size='10' data-field-types='[[&quot;ID&quot;, [&quot;string&quot;, &quot;object&quot;]], [&quot;date&quot;, [&quot;datetime&quot;, &quot;datetime64[ns]&quot;]]]' data-show-filters='true' data-show-download='true' data-show-column-summaries='false' data-show-data-types='true' data-show-page-size-selector='false' data-show-column-explorer='true' data-show-chart-builder='true' data-row-headers='[]' data-has-stable-row-id='false' data-lazy='false' data-preload='false'></marimo-table></marimo-ui-element>"}, "type": "data"}]}, {"code_hash": "01c265b1bc0d6ffdc5c680ea937a4fe9", "console": [], "id": "fwwy", "outputs": [{"data": {"text/html": "<span class=\"markdown prose dark:prose-invert\"><span class=\"paragraph\">Very well, let's find the range now.</span></span>"}, "type": "data"}]}, {"code_hash": "6442a9f04a47412dedc7bfea42ece550", "console": [{"name": "stdout", "text": "01-03-2024 to 22-04-2024\n", "type": "stream"}], "id": "LJZf", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}, {"code_hash": "1d0db38904205bec4d6f6f6a1f6cec3e", "console": [], "id": "urSm", "outputs": [{"data": {"text/plain": ""}, "type": "data"}]}], "metadata": {"marimo_version": "0.15.2"}, "version": "1"},
"runtimeConfig": null,
};
</script>
<marimo-code hidden="">
import%20marimo%0A%0A__generated_with%20%3D%20%220.15.2%22%0Aapp%20%3D%20marimo.App()%0A%0A%0A%40app.cell%0Adef%20_()%3A%0A%20%20%20%20import%20numpy%20as%20np%0A%20%20%20%20import%20pandas%20as%20pd%0A%20%20%20%20from%20pathlib%20import%20Path%0A%20%20%20%20return%20Path%2C%20pd%0A%0A%0A%40app.cell%0Adef%20_()%3A%0A%20%20%20%20import%20os%0A%20%20%20%20for%20dirname%2C%20_%2C%20filenames%20in%20os.walk('%2Fkaggle%2Finput')%3A%0A%20%20%20%20%20%20%20%20for%20filename%20in%20filenames%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20print(os.path.join(dirname%2C%20filename))%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%23%23%20Exploratory%20Data%20Analysis%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22A%20simple%20function%20to%20load%20the%20data%20from%20the%20respective%20CSV%20files%20without%20the%20hassle%20of%20writing%20the%20full%20path.%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(Path%2C%20pd)%3A%0A%20%20%20%20def%20load_df(name%3A%20str)%20-%3E%20pd.DataFrame%3A%0A%20%20%20%20%20%20%20%20return%20pd.read_csv(Path('Cinema_Audience_Forecasting_challenge')%20%2F%20name%20%2F%20f'%7Bname%7D.csv')%0A%20%20%20%20return%20(load_df%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22To%20organize%20the%20different%20dataframes%2C%20we%20use%20simple%20namespaces%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_()%3A%0A%20%20%20%20from%20types%20import%20SimpleNamespace%0A%20%20%20%20return%20(SimpleNamespace%2C)%0A%0A%0A%40app.cell%0Adef%20_(SimpleNamespace%2C%20load_df)%3A%0A%20%20%20%20booknow%20%3D%20SimpleNamespace(%0A%20%20%20%20%20%20%20%20visits%3Dload_df('booknow_visits')%2C%0A%20%20%20%20%20%20%20%20booking%3Dload_df('booknow_booking')%2C%0A%20%20%20%20%20%20%20%20theaters%3Dload_df('booknow_theaters')%0A%20%20%20%20)%0A%20%20%20%20return%20(booknow%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22What%20is%20the%20shape%20of%20the%20dataset%20named%20booknow_booking%3F%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(booknow)%3A%0A%20%20%20%20booknow.booking.shape%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(booknow)%3A%0A%20%20%20%20booknow.booking.columns%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(SimpleNamespace%2C%20load_df)%3A%0A%20%20%20%20cinepos%20%3D%20SimpleNamespace(%0A%20%20%20%20%20%20%20%20booking%3Dload_df('cinePOS_booking')%2C%0A%20%20%20%20%20%20%20%20theaters%3Dload_df('cinePOS_theaters')%0A%20%20%20%20)%0A%20%20%20%20return%20(cinepos%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20How%20many%20columns%20are%20of%20type%20%60Object%60%20in%20the%20dataset%20named%20%60cinePOS_theaters%60%3F%0A%0A%20%20%20%20We%20will%20use%20an%20ephemeral%20function%20to%20not%20pollute%20the%20global%20namespace.%0A%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(cinepos)%3A%0A%20%20%20%20def%20_()%3A%0A%20%20%20%20%20%20%20%20metadata%20%3D%20cinepos.theaters.dtypes%0A%20%20%20%20%20%20%20%20object_columns%20%3D%20metadata%5Bmetadata%20%3D%3D%20'object'%5D%0A%20%20%20%20%20%20%20%20print(object_columns)%0A%20%20%20%20%20%20%20%20return%20len(object_columns)%0A%20%20%20%20_()%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20Which%20of%20the%20following%20theater%20types%20is%20the%20*most%20frequent*%20in%20the%20dataset%20named%20%60booknow_theaters%60%3F%0A%0A%20%20%20%20Let's%20check%20which%20column%20has%20theater%20types.%0A%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(booknow)%3A%0A%20%20%20%20booknow.theaters.columns%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Okay%20so%20we%20can%20use%20%60pd.Series.mode%60%20on%20the%20%60theater_type%60%20column.%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(booknow)%3A%0A%20%20%20%20booknow.theaters.theater_type.mode()%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22How%20many%20*distinct%20theaters*%20are%20present%20in%20the%20dataset%20named%20%60cinePOS_booking%60%3F%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(cinepos)%3A%0A%20%20%20%20cinepos.booking.columns%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Let's%20check%20if%20this%20has%20any%20%60NaN%60%20values.%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(cinepos)%3A%0A%20%20%20%20cinepos.booking.cine_theater_id.isna().sum()%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(cinepos)%3A%0A%20%20%20%20cinepos.booking.cine_theater_id.nunique()%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Combine%20the%20datasets%20booknow_booking%20and%20booknow_theaters.%20What%20is%20the%20shape%20of%20this%20dataset%3F%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(booknow)%3A%0A%20%20%20%20%23%20how%3D'inner'%20by%20default%0A%20%20%20%20%23%20on%3D'...'%20is%20the%20intersecting%20column%20by%20default%0A%20%20%20%20%23%20thus%2C%20on%3D'book_theater_id'%0A%20%20%20%20booknow.booking_theaters_inner%20%3D%20booknow.booking.merge(booknow.theaters)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(booknow)%3A%0A%20%20%20%20booknow.booking_theaters_inner.shape%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Based%20on%20the%20combined%20dataset%20obtained%20in%20the%20previous%20question%2C%20how%20many%20distinct%20areas%20are%20the%20theaters%20located%20in%3F%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(booknow)%3A%0A%20%20%20%20booknow.booking_theaters_inner.theater_area.nunique()%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Combine%20the%20datasets%20movie_theater_id_relation%2C%20cinePOS_booking%20and%20cinePOS_theaters.%20Enter%20the%20column%20names%20containing%20missing%20values%20as%20comma%20seperated%20values.%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(cinepos%2C%20load_df)%3A%0A%20%20%20%20cinepos.id_relation%20%3D%20load_df('movie_theater_id_relation')%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(cinepos)%3A%0A%20%20%20%20print(cinepos.id_relation.columns)%0A%20%20%20%20print(cinepos.booking.columns)%0A%20%20%20%20print(cinepos.theaters.columns)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(cinepos)%3A%0A%20%20%20%20def%20_()%3A%0A%20%20%20%20%20%20%20%20merged%20%3D%20cinepos.id_relation.merge(cinepos.booking).merge(cinepos.theaters)%0A%20%20%20%20%20%20%20%20isna%20%3D%20merged.isna().sum()%0A%20%20%20%20%20%20%20%20print(isna)%0A%20%20%20%20%20%20%20%20print(%22missing%20value%20columns%3A%22%2C%20'%2C'.join(isna%5Bisna%20%3E%200%5D.index))%0A%20%20%20%20_()%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Enter%20the%20date%20range%20spanned%20in%20the%20dataset%20named%20booknow_booking.%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(booknow%2C%20pd)%3A%0A%20%20%20%20booknow.booking.show_datetime%20%3D%20pd.to_datetime(booknow.booking.show_datetime)%0A%20%20%20%20booknow.booking.booking_datetime%20%3D%20pd.to_datetime(booknow.booking.booking_datetime)%0A%20%20%20%20return%0A%0A%0A%40app.function%0Adef%20iso_to_british_date(time)%3A%0A%20%20%20%20return%20time.strftime('%25d-%25m-%25Y')%0A%0A%0A%40app.cell%0Adef%20_(booknow)%3A%0A%20%20%20%20print(%0A%20%20%20%20%20%20%20%20iso_to_british_date(booknow.booking.show_datetime.dt.date.min())%2C%0A%20%20%20%20%20%20%20%20%22to%22%2C%0A%20%20%20%20%20%20%20%20iso_to_british_date(booknow.booking.show_datetime.dt.date.max())%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(booknow)%3A%0A%20%20%20%20print(%0A%20%20%20%20%20%20%20%20iso_to_british_date(booknow.booking.booking_datetime.dt.date.min())%2C%0A%20%20%20%20%20%20%20%20%22to%22%2C%0A%20%20%20%20%20%20%20%20iso_to_british_date(booknow.booking.booking_datetime.dt.date.max())%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Fortunately%2C%20there%20are%20no%20inconsitencies%20between%20the%20range%20of%20%60show_datetime%60%20and%20%60booking_datetime%60.%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Enter%20the%20date%20range%20spanned%20in%20the%20test%20dataset%20(sample_submission).%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(SimpleNamespace%2C%20load_df)%3A%0A%20%20%20%20submission%20%3D%20SimpleNamespace(%0A%20%20%20%20%20%20%20%20sample%3Dload_df('sample_submission')%0A%20%20%20%20)%0A%20%20%20%20return%20(submission%2C)%0A%0A%0A%40app.cell%0Adef%20_(submission)%3A%0A%20%20%20%20submission.sample.head()%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22We%20have%20to%20split%20the%20IDs.%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(pd%2C%20submission)%3A%0A%20%20%20%20submission.input%20%3D%20pd.DataFrame()%0A%20%20%20%20submission.input%5B%5B'ID'%2C%20'date'%5D%5D%20%3D%20submission.sample.ID.str.rsplit(%22_%22%2C%20n%3D1%2C%20expand%3DTrue)%0A%20%20%20%20submission.input.date%20%3D%20pd.to_datetime(submission.input.date)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(submission)%3A%0A%20%20%20%20submission.input.head()%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Very%20well%2C%20let's%20find%20the%20range%20now.%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(submission)%3A%0A%20%20%20%20print(%0A%20%20%20%20%20%20%20%20iso_to_british_date(submission.input.date.dt.date.min())%2C%0A%20%20%20%20%20%20%20%20%22to%22%2C%0A%20%20%20%20%20%20%20%20iso_to_british_date(submission.input.date.dt.date.max())%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_()%3A%0A%20%20%20%20import%20marimo%20as%20mo%0A%20%20%20%20return%20(mo%2C)%0A%0A%0Aif%20__name__%20%3D%3D%20%22__main__%22%3A%0A%20%20%20%20app.run()%0A
</marimo-code>
<marimo-code-hash hidden="">5d2c741af210e89fafa5350ff5fe7601c907867057054d738de07ab3db6accec</marimo-code-hash>
</body>
</html>