-
Notifications
You must be signed in to change notification settings - Fork 19
/
22-2.1 ETL with bonobo
1 lines (1 loc) · 7.85 KB
/
22-2.1 ETL with bonobo
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"22-2.1 ETL with bonobo","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyN7cLqZ4ZOLUPcxGpHFJySO"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"7mRPZ8jDmgT2","executionInfo":{"status":"ok","timestamp":1605203891818,"user_tz":-60,"elapsed":1887,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}}},"source":["# ETL with bonobo\n","# author: Gressling, T\n","# license: MIT License # code: github.com/gressling/examples\n","# activity: single example # index: 22-1 "],"execution_count":1,"outputs":[]},{"cell_type":"code","metadata":{"id":"2a7pfctDmlvi"},"source":["!pip install bonobo"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"6hXAHxIVmoMJ","executionInfo":{"status":"ok","timestamp":1605203911499,"user_tz":-60,"elapsed":1794,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}}},"source":["import bonobo"],"execution_count":3,"outputs":[]},{"cell_type":"code","metadata":{"id":"AQIHyVqJmqcx","executionInfo":{"status":"ok","timestamp":1605203914915,"user_tz":-60,"elapsed":1890,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}}},"source":["def generate_data():\n"," yield 'Data'\n"," yield 'science'\n"," yield 'in'\n"," yield 'chemistry'"],"execution_count":4,"outputs":[]},{"cell_type":"code","metadata":{"id":"-qU-NKngmszR","executionInfo":{"status":"ok","timestamp":1605203918288,"user_tz":-60,"elapsed":1317,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}}},"source":["def uppercase(x: str):\n"," return x.upper()\n"," \n","def output(x: str):\n"," print(x)"],"execution_count":5,"outputs":[]},{"cell_type":"code","metadata":{"id":"fKCG8cqomvnI","executionInfo":{"status":"ok","timestamp":1605203932378,"user_tz":-60,"elapsed":1438,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}},"outputId":"eb04abb4-464e-4eb1-a1b3-faf8b9dcb05a","colab":{"base_uri":"https://localhost:8080/"}},"source":["graph = bonobo.Graph(\n"," generate_data,\n"," uppercase,\n"," output,\n",")\n","graph.add_chain(output, _input=generate_data)"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["GraphRange(graph=<bonobo.structs.graphs.Graph object at 0x7f8c660026d8>, input=3, output=3)"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"6bxJeLJUm185","executionInfo":{"status":"ok","timestamp":1605203934990,"user_tz":-60,"elapsed":785,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}},"outputId":"7ae57b14-5c9d-48bf-f979-9fe0bc4e7295","colab":{"base_uri":"https://localhost:8080/","height":169}},"source":["graph"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div><?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n","<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n"," \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n","<!-- Generated by graphviz version 2.40.1 (20161225.0304)\n"," -->\n","<!-- Title: %3 Pages: 1 -->\n","<svg width=\"408pt\" height=\"98pt\"\n"," viewBox=\"0.00 0.00 408.17 98.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n","<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 94)\">\n","<title>%3</title>\n","<polygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-94 404.1744,-94 404.1744,4 -4,4\"/>\n","<!-- BEGIN -->\n","<g id=\"node1\" class=\"node\">\n","<title>BEGIN</title>\n","<ellipse fill=\"#000000\" stroke=\"#000000\" cx=\"1.8\" cy=\"-45\" rx=\"1.8\" ry=\"1.8\"/>\n","</g>\n","<!-- 0 -->\n","<g id=\"node2\" class=\"node\">\n","<title>0</title>\n","<ellipse fill=\"none\" stroke=\"#000000\" cx=\"101.3445\" cy=\"-45\" rx=\"61.99\" ry=\"18\"/>\n","<text text-anchor=\"middle\" x=\"101.3445\" y=\"-41.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">generate_data</text>\n","</g>\n","<!-- BEGIN->0 -->\n","<g id=\"edge1\" class=\"edge\">\n","<title>BEGIN->0</title>\n","<path fill=\"none\" stroke=\"#000000\" d=\"M3.8791,-45C7.9362,-45 17.5412,-45 29.2182,-45\"/>\n","<polygon fill=\"#000000\" stroke=\"#000000\" points=\"29.438,-48.5001 39.4379,-45 29.4379,-41.5001 29.438,-48.5001\"/>\n","</g>\n","<!-- 1 -->\n","<g id=\"node3\" class=\"node\">\n","<title>1</title>\n","<ellipse fill=\"none\" stroke=\"#000000\" cx=\"246.5348\" cy=\"-72\" rx=\"47.3916\" ry=\"18\"/>\n","<text text-anchor=\"middle\" x=\"246.5348\" y=\"-68.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">uppercase</text>\n","</g>\n","<!-- 0->1 -->\n","<g id=\"edge2\" class=\"edge\">\n","<title>0->1</title>\n","<path fill=\"none\" stroke=\"#000000\" d=\"M153.7616,-54.7476C166.7083,-57.1552 180.5852,-59.7358 193.5583,-62.1483\"/>\n","<polygon fill=\"#000000\" stroke=\"#000000\" points=\"193.2624,-65.6532 203.7338,-64.0406 194.5422,-58.7712 193.2624,-65.6532\"/>\n","</g>\n","<!-- 3 -->\n","<g id=\"node4\" class=\"node\">\n","<title>3</title>\n","<ellipse fill=\"none\" stroke=\"#000000\" cx=\"246.5348\" cy=\"-18\" rx=\"35.194\" ry=\"18\"/>\n","<text text-anchor=\"middle\" x=\"246.5348\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">output</text>\n","</g>\n","<!-- 0->3 -->\n","<g id=\"edge3\" class=\"edge\">\n","<title>0->3</title>\n","<path fill=\"none\" stroke=\"#000000\" d=\"M153.7616,-35.2524C170.0196,-32.229 187.7443,-28.9329 203.3043,-26.0393\"/>\n","<polygon fill=\"#000000\" stroke=\"#000000\" points=\"203.9882,-29.4722 213.1797,-24.2028 202.7083,-22.5902 203.9882,-29.4722\"/>\n","</g>\n","<!-- 2 -->\n","<g id=\"node5\" class=\"node\">\n","<title>2</title>\n","<ellipse fill=\"none\" stroke=\"#000000\" cx=\"365.0775\" cy=\"-72\" rx=\"35.194\" ry=\"18\"/>\n","<text text-anchor=\"middle\" x=\"365.0775\" y=\"-68.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">output</text>\n","</g>\n","<!-- 1->2 -->\n","<g id=\"edge4\" class=\"edge\">\n","<title>1->2</title>\n","<path fill=\"none\" stroke=\"#000000\" d=\"M294.0676,-72C302.5853,-72 311.4398,-72 319.889,-72\"/>\n","<polygon fill=\"#000000\" stroke=\"#000000\" points=\"319.9444,-75.5001 329.9444,-72 319.9444,-68.5001 319.9444,-75.5001\"/>\n","</g>\n","</g>\n","</svg>\n","</div><pre><bonobo.structs.graphs.Graph object at 0x7f8c660026d8></pre>"],"text/plain":["<bonobo.structs.graphs.Graph at 0x7f8c660026d8>"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"id":"O4ZSE4WKm0YJ","executionInfo":{"status":"ok","timestamp":1605203938904,"user_tz":-60,"elapsed":895,"user":{"displayName":"Dr. Thorsten Gressling","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gjsy54zs1Pd2tw4fMBrz3a7FsYlzNHgwHWDqLywqA=s64","userId":"08619859847936335393"}},"outputId":"f538e987-1082-49a2-ee6a-c0adfa1ab860","colab":{"base_uri":"https://localhost:8080/"}},"source":["bonobo.run(graph)"],"execution_count":9,"outputs":[{"output_type":"stream","text":["DATA\n","SCIENCE\n","IN\n","CHEMISTRY\n","Data\n","science\n","in\n","chemistry\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["<bonobo.execution.contexts.graph.GraphExecutionContext at 0x7f8c66002978>"]},"metadata":{"tags":[]},"execution_count":9}]}]}