<?xml version="1.0"?>
<?xml-stylesheet type="text/css" href="https://script.spoken-tutorial.org/skins/common/feed.css?303"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
		<id>https://script.spoken-tutorial.org/index.php?action=history&amp;feed=atom&amp;title=Python-for-Automation%2FC3%2FWeb-Scraping%2FEnglish</id>
		<title>Python-for-Automation/C3/Web-Scraping/English - Revision history</title>
		<link rel="self" type="application/atom+xml" href="https://script.spoken-tutorial.org/index.php?action=history&amp;feed=atom&amp;title=Python-for-Automation%2FC3%2FWeb-Scraping%2FEnglish"/>
		<link rel="alternate" type="text/html" href="https://script.spoken-tutorial.org/index.php?title=Python-for-Automation/C3/Web-Scraping/English&amp;action=history"/>
		<updated>2026-05-13T10:44:40Z</updated>
		<subtitle>Revision history for this page on the wiki</subtitle>
		<generator>MediaWiki 1.23.17</generator>

	<entry>
		<id>https://script.spoken-tutorial.org/index.php?title=Python-for-Automation/C3/Web-Scraping/English&amp;diff=56697&amp;oldid=prev</id>
		<title>Madhurig at 07:05, 5 November 2024</title>
		<link rel="alternate" type="text/html" href="https://script.spoken-tutorial.org/index.php?title=Python-for-Automation/C3/Web-Scraping/English&amp;diff=56697&amp;oldid=prev"/>
				<updated>2024-11-05T07:05:32Z</updated>
		
		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;a href=&quot;https://script.spoken-tutorial.org/index.php?title=Python-for-Automation/C3/Web-Scraping/English&amp;amp;diff=56697&amp;amp;oldid=56696&quot;&gt;Show changes&lt;/a&gt;</summary>
		<author><name>Madhurig</name></author>	</entry>

	<entry>
		<id>https://script.spoken-tutorial.org/index.php?title=Python-for-Automation/C3/Web-Scraping/English&amp;diff=56696&amp;oldid=prev</id>
		<title>Madhurig at 06:50, 5 November 2024</title>
		<link rel="alternate" type="text/html" href="https://script.spoken-tutorial.org/index.php?title=Python-for-Automation/C3/Web-Scraping/English&amp;diff=56696&amp;oldid=prev"/>
				<updated>2024-11-05T06:50:16Z</updated>
		
		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;a href=&quot;https://script.spoken-tutorial.org/index.php?title=Python-for-Automation/C3/Web-Scraping/English&amp;amp;diff=56696&amp;amp;oldid=56691&quot;&gt;Show changes&lt;/a&gt;</summary>
		<author><name>Madhurig</name></author>	</entry>

	<entry>
		<id>https://script.spoken-tutorial.org/index.php?title=Python-for-Automation/C3/Web-Scraping/English&amp;diff=56691&amp;oldid=prev</id>
		<title>Nirmala Venkat: Created page with &quot; {| border=&quot;1&quot; |- || '''Visual Cue''' || '''Narration''' |- |- style=&quot;border:1pt solid #000000;padding:0.176cm;&quot; || Show Slide:  '''Welcome''' || Hello and welcome to the Spok...&quot;</title>
		<link rel="alternate" type="text/html" href="https://script.spoken-tutorial.org/index.php?title=Python-for-Automation/C3/Web-Scraping/English&amp;diff=56691&amp;oldid=prev"/>
				<updated>2024-10-15T10:00:19Z</updated>
		
		<summary type="html">&lt;p&gt;Created page with &amp;quot; {| border=&amp;quot;1&amp;quot; |- || &amp;#039;&amp;#039;&amp;#039;Visual Cue&amp;#039;&amp;#039;&amp;#039; || &amp;#039;&amp;#039;&amp;#039;Narration&amp;#039;&amp;#039;&amp;#039; |- |- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot; || Show Slide:  &amp;#039;&amp;#039;&amp;#039;Welcome&amp;#039;&amp;#039;&amp;#039; || Hello and welcome to the Spok...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;&lt;br /&gt;
{| border=&amp;quot;1&amp;quot;&lt;br /&gt;
|-&lt;br /&gt;
|| '''Visual Cue'''&lt;br /&gt;
|| '''Narration'''&lt;br /&gt;
|-&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Show Slide:&lt;br /&gt;
&lt;br /&gt;
'''Welcome'''&lt;br /&gt;
|| Hello and welcome to the Spoken Tutorial on '''Web Scraping''' &lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Show Slide:&lt;br /&gt;
&lt;br /&gt;
'''Learning Objectives'''&lt;br /&gt;
|| In this tutorial, we will learn to &lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Scrape data from any website&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Extract it to a CSV file&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Perform basic data analysis&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Generate visualizations&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Show Slide:&lt;br /&gt;
&lt;br /&gt;
'''System Requirements'''&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Ubuntu''' '''Linux OS 22.04'''&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Python 3.12.3'''&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
|| To record this tutorial, I am using&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Ubuntu''' '''Linux OS version 22.04'''&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Python 3.12.3'''&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Show Slide:'''Pre-requisites'''&lt;br /&gt;
&lt;br /&gt;
[https://www.spoken-tutorial.org/ https://www.spoken-tutorial.org]&lt;br /&gt;
|| To follow this tutorial &lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;You must have basic knowledge of using Linux Terminal and Python&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;For pre-requisite Linux and Python Tutorials, please visit this website&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Python libraries required for automation must be installed&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Show Slide:'''Code Files'''&lt;br /&gt;
||&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;The files used in this tutorial are provided in the Code files''' '''link.&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Please download and extract the files.&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Make a copy and then use them while practicing.&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Show Slide:&lt;br /&gt;
&lt;br /&gt;
'''Web Scraping'''&lt;br /&gt;
|| '''Web Scraping''' is the''' '''automated process of extracting data from websites with software.&lt;br /&gt;
&lt;br /&gt;
We will automate extracting data and information from web pages and parsing '''HTML '''content.&lt;br /&gt;
|-&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:1pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | Show Slide:&lt;br /&gt;
&lt;br /&gt;
'''Web Scraping - Libraries'''&amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:0.5pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | To automate the process of extracting multimedia from a website, we need:&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Requests '''library to fetch HTML content from a web page &amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''BeautifulSoup '''library to parse and extract information from the HTML content&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Matplotlib''' library to create static, animated, and interactive visualizations&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Pandas''' library to provide data structures and data analysis tools &amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Xlsxwriter''' library is used for creating and formatting Excel files&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Datetime '''library handles date operations like parsing strings into date objects&amp;lt;/div&amp;gt;&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Show Slide: &lt;br /&gt;
&lt;br /&gt;
'''Web Scraping - Example'''&lt;br /&gt;
|| For this tutorial, we will extract data from the spoken Tutorial '''statistics '''webpage.&lt;br /&gt;
Data analysis is done with workshops conducted between 2022 and 2023 on certain '''software.'''&lt;br /&gt;
&lt;br /&gt;
Data such as State, City, Institution, Department, Organizer, Date and Participants are handled.&lt;br /&gt;
&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Point to the '''webscraping.py''' in downloads folder&lt;br /&gt;
&lt;br /&gt;
Open the Text Editor with the source file&lt;br /&gt;
|| I have created the source file '''webscraping.py''' for demonstration.&lt;br /&gt;
&lt;br /&gt;
Now, we will go through the source code in the text editor. &lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Looking at the code&lt;br /&gt;
|| This source code will extract the necessary data, analyze it and plot graphs.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:'''import requests'''&lt;br /&gt;
&lt;br /&gt;
'''from bs4 import BeautifulSoup'''&lt;br /&gt;
&lt;br /&gt;
'''import pandas as pd'''&lt;br /&gt;
&lt;br /&gt;
'''from datetime import datetime'''&lt;br /&gt;
&lt;br /&gt;
'''import matplotlib.pyplot as plt'''&lt;br /&gt;
&lt;br /&gt;
'''from mpl_toolkits.mplot3d import Axes3D'''&lt;br /&gt;
|| First we need to import the necessary modules for '''web scraping''' in '''Python'''. &lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| We fetch the '''HTML '''of the page with''' requests.get''' to the '''URL'''.&lt;br /&gt;
&lt;br /&gt;
Then, we parse it with '''BeautifulSoup '''to return a '''soup object''' for further analysis.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
&lt;br /&gt;
|| '''extract_table_data''' '''function '''extracts relevant data from an '''HTML '''table in the''' soup object'''.&lt;br /&gt;
&lt;br /&gt;
Then, we find the table with the provided class name in the '''HTML'''. &lt;br /&gt;
&lt;br /&gt;
An empty '''list '''is returned if no table is found.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
&lt;br /&gt;
|| We find all the '''rows '''of the table except the first header row.&lt;br /&gt;
&lt;br /&gt;
Then, an empty '''list '''is initialized to store the extracted data. &lt;br /&gt;
&lt;br /&gt;
For each '''row '''in the table, we extract all cells and strip the text content from each cell.&lt;br /&gt;
&lt;br /&gt;
Then, we store the values in a '''list'''.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| We finally check if the '''FOSS '''type column matches any of the values in the '''foss filter.'''&lt;br /&gt;
&lt;br /&gt;
Then, we convert the '''date '''column to a '''datetime object.'''&lt;br /&gt;
&lt;br /&gt;
This is to verify if it falls within the specified time range.&lt;br /&gt;
&lt;br /&gt;
If both conditions are met, the '''row's '''data is added to the '''list'''.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| This function, locates the''' pagination element''' in the '''HTML '''and extracts the page numbers from it.&lt;br /&gt;
&lt;br /&gt;
'''Pagination '''is the process of dividing content into discrete pages.&lt;br /&gt;
&lt;br /&gt;
If page numbers are found, it returns the last one as the total number of pages. &lt;br /&gt;
&lt;br /&gt;
If no '''pagination '''is found, it returns 1, assuming there's only one page. &lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| '''scrape_all_pages''' '''function '''scrapes data from all available pages.&lt;br /&gt;
&lt;br /&gt;
First, it fetches the initial page’s content and then determines the number of pages.&lt;br /&gt;
&lt;br /&gt;
Finally we extract the relevant data based on the '''filters'''.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| Now, we iterate over all remaining pages starting from page 2.&lt;br /&gt;
&lt;br /&gt;
First, we modify the '''url '''to request each specific page, fetch content and extract data.&lt;br /&gt;
&lt;br /&gt;
The data from each page is then appended to the overall '''dataset '''and returned.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| Next, we will define functions for''' data analysis and data visualization'''. &lt;br /&gt;
&lt;br /&gt;
'''piechart_visualization''' '''function '''generates a pie chart showing the '''FOSS categories'''.&lt;br /&gt;
&lt;br /&gt;
Additionally, the '''FOSS '''counts are saved to an '''Excel sheet '''with the specified sheet name.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| '''barchart_visualization''' '''function '''creates a '''bar chart''' showing the number of '''workshops '''per city.&lt;br /&gt;
&lt;br /&gt;
It then writes the data to an '''excel sheet''' with the specified sheet name.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
&lt;br /&gt;
|| We now define a '''function '''to''' '''filter the '''data frame''' to find '''workshops '''held at a specified college.&lt;br /&gt;
&lt;br /&gt;
Then counting and retrieving unique '''FOSS types''', departments, and organizers.&lt;br /&gt;
&lt;br /&gt;
If no data is found for the given college, it returns None.&lt;br /&gt;
&lt;br /&gt;
Otherwise, it returns the unique '''FOSS categories''', departments, and organizers.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| We now''' '''first convert the '''Participants '''column to numeric values, filling any invalid entries with 0.&lt;br /&gt;
&lt;br /&gt;
Then we group the '''data '''by city and '''FOSS type''' and calculate the total number of participants.&lt;br /&gt;
&lt;br /&gt;
Unique city names and''' FOSS types''' are extracted, and numeric mappings are created to represent them.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| We can now create a '''3D bar chart''' visualizing the participants in workshops by city and '''FOSS type'''.&lt;br /&gt;
&lt;br /&gt;
The axes are labeled, and ticks are mapped to cities and '''FOSS '''values with a title.&lt;br /&gt;
&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| We define this '''function to '''ensure that an excel sheet name does not exceed the 31 character limit.&lt;br /&gt;
&lt;br /&gt;
If the name is longer than 31 characters, it '''truncates '''the name to 28 characters with '''ellipsis'''.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| We now define the base '''url '''for scraping the data and set filters to focus on specific '''FOSS types.'''&lt;br /&gt;
&lt;br /&gt;
We also set a '''date range''' from January 1 2022 to January 1 2023.&lt;br /&gt;
&lt;br /&gt;
We then convert the start and end dates from string format to '''datetime objects''' for comparison.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight: &lt;br /&gt;
|| We now scrape all pages of data using the defined filters and store it in a '''DataFrame. '''&lt;br /&gt;
&lt;br /&gt;
The '''DataFrame '''is then created with specified '''columns''', and duplicates are removed.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| We can now write the '''DataFrame '''data to an''' Excel file''' and generate visualizations.&lt;br /&gt;
&lt;br /&gt;
We also analyze and save '''workshop '''data for a specific college if available.&lt;br /&gt;
|- style=&amp;quot;border:1pt solid #000000;padding:0.176cm;&amp;quot;&lt;br /&gt;
|| Highlight:&lt;br /&gt;
|| A '''3D visualization''' is generated from the '''DataFrame'''. &lt;br /&gt;
&lt;br /&gt;
The data is saved and a confirmation message is printed.&lt;br /&gt;
|-&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:1pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | Save the Code in the '''Downloads '''Folder&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:0.5pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | Save the code as '''webscraping.py '''in the '''Downloads '''folder.&lt;br /&gt;
|-&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:1pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | Open the terminal ('''Ctrl + Alt + T''')&lt;br /&gt;
&lt;br /&gt;
Start Virtual Environment&lt;br /&gt;
&lt;br /&gt;
Type&lt;br /&gt;
&lt;br /&gt;
'''&amp;gt; source Automation/bin/activate'''&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:0.5pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | Open the '''terminal''' by pressing '''Control + Alt + T '''keys simultaneously.&lt;br /&gt;
&lt;br /&gt;
We will open the virtual environment we created for the '''Automation''' series.&lt;br /&gt;
&lt;br /&gt;
Type '''source space Automation forward slash bin forward slash activate.'''&lt;br /&gt;
&lt;br /&gt;
Then press enter.&lt;br /&gt;
|-&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:1pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | Running the Code&lt;br /&gt;
&lt;br /&gt;
Type &lt;br /&gt;
&lt;br /&gt;
'''&amp;gt; &amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;cd Downloads'''&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
'''&amp;gt; python3 webscraping.py'''&lt;br /&gt;
|| &amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;Now type, &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''cd Downloads'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;.&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style=&amp;quot;color:#252525;&amp;quot;&amp;gt;Then type&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;color:#252525;&amp;quot;&amp;gt;''' python3 &amp;lt;/span&amp;gt;webscraping&amp;lt;span style=&amp;quot;color:#252525;&amp;quot;&amp;gt;.py'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt; and press &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;color:#252525;&amp;quot;&amp;gt;'''Enter'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;&lt;br /&gt;
|-&lt;br /&gt;
|| Observing the graphs# &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Pie Chart'''&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|| &amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;As soon as we execute the code, &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''matplotlib '''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;will display the graphs.&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;The &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;'''pie chart'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt; displays the distribution of &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;'''workshops '''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;by &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;'''FOSS category'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;.&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;This shows the proportion of each &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;'''FOSS type'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt; as a percentage of the total &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;'''workshops'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;.&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Close the window to see the next graph.&lt;br /&gt;
|-&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:1pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | Observing the graphs# &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Bar Chart'''&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
|| &amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;The next &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''graph '''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;we get is a &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''Bar chart'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;. &amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;This shows the number of &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''workshops '''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;conducted in each city. &amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Close the window to go to the next graph.&lt;br /&gt;
|-&lt;br /&gt;
|| Observing the graphs# &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''3D Bar Chart'''&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
|| &amp;lt;div style=&amp;quot;color:#252525;&amp;quot;&amp;gt;Finally, we see the 3D bar chart.&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;This graph displays cities, &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''FOSS types'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt; and participant counts on the three axes.&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;div style=&amp;quot;color:#252525;&amp;quot;&amp;gt;Close the window.&amp;lt;/div&amp;gt;&lt;br /&gt;
|-&lt;br /&gt;
|| &lt;br /&gt;
|| &amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;Let us check the data in the &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''excel sheet'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;.&amp;lt;/span&amp;gt;&lt;br /&gt;
|-&lt;br /&gt;
|| Navigating to Downloads&lt;br /&gt;
&lt;br /&gt;
'''Files App &amp;gt; Downloads &amp;gt; st_data.xlsx'''&lt;br /&gt;
|| Go to the '''Downloads folder '''and double click to open the&amp;lt;span style=&amp;quot;color:#ff0000;&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;'''st_data.xlsx''' file.&lt;br /&gt;
|-&lt;br /&gt;
|| Observing the Excel sheet&lt;br /&gt;
|| We can see in the bottom left corner that we have created four sheets.&lt;br /&gt;
|-&lt;br /&gt;
|| Sheet 1 - Workshops Data&lt;br /&gt;
&lt;br /&gt;
Zoom and show the data&lt;br /&gt;
&lt;br /&gt;
|| &amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;The first &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''sheet '''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;has the &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''Raw data'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt; that we extracted from the &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;'''Spoken Tutorial '''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;website.&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;We can see the 10 &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''columns '''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;here which contain all the data the website had.&amp;lt;/span&amp;gt;&lt;br /&gt;
|-&lt;br /&gt;
|| Sheet 2 - FOSS Visualization&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:0.5pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | &amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;In the second sheet named &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''FOSS Visualization'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;, the count of workshops for each FOSS category is shown.&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;This is the data of the &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''pie chart'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt; we have seen earlier. &amp;lt;/span&amp;gt;&lt;br /&gt;
|-&lt;br /&gt;
|| Sheet 3 - City Visualization&lt;br /&gt;
|| &amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;The third sheet &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;'''City Visualization&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt; '''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;shows the number of workshops conducted per city.&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;This is the data of the &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''bar graph '''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;we have seen earlier.&amp;lt;/span&amp;gt;&lt;br /&gt;
|-&lt;br /&gt;
|| Sheet 4 - Shri Phanishwar Nath Renu En...&lt;br /&gt;
|| &amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;The last sheet shows the unique data - &amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;'''FOSS type'''&amp;lt;/span&amp;gt;&amp;lt;span style=&amp;quot;background-color:#ffffff;color:#252525;&amp;quot;&amp;gt;, organizers, workshop dates and participants.&amp;lt;/span&amp;gt;&lt;br /&gt;
|-&lt;br /&gt;
|| Closing the virtual environment&lt;br /&gt;
Type &lt;br /&gt;
&lt;br /&gt;
'''&amp;gt; deactivate'''&lt;br /&gt;
|| Switch back to the terminal to close the virtual environment.&lt;br /&gt;
&lt;br /&gt;
Type '''deactivate'''.&lt;br /&gt;
|-&lt;br /&gt;
|| Show Slide:'''Applications of Web Scraping'''&amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;&lt;br /&gt;
|| '''Web Scraping''' has lots of applications across various fields.&lt;br /&gt;
&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Price Monitoring''' - E-commerce websites scrape their &amp;lt;span style=&amp;quot;background-color:#ffffff;&amp;quot;&amp;gt;Competitor&amp;lt;/span&amp;gt; websites.&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;They monitor their prices and adjust theirs accordingly.&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|| Show Slide:'''Applications of Web Scraping'''&amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;&lt;br /&gt;
||&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Academic Research''' - Researchers can scrape data from academic journals and websites.&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;They collect information for studies and research.&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;'''Financial Data Analysis''' - Analysts use web scraping to collect data from financial websites.&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;They analyze stock prices, market trends etc.&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|| Show Slide:'''Summary'''&lt;br /&gt;
|| This brings us to the end of this tutorial. Let us summarise.&lt;br /&gt;
&lt;br /&gt;
In this tutorial, we have learnt to&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Extract data from websites&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Save data to a CSV file&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Perform basic data analysis&amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Generate visualizations&amp;lt;/div&amp;gt;&lt;br /&gt;
|-&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:1pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | Show Slide:&lt;br /&gt;
&lt;br /&gt;
'''Assignment'''&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:0.5pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | As an assignment, please do the following:&lt;br /&gt;
&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Extract the '''workshop '''data using different '''foss filters''', start and end date. &amp;lt;/div&amp;gt;&lt;br /&gt;
* &amp;lt;div style=&amp;quot;margin-left:1.27cm;margin-right:0cm;&amp;quot;&amp;gt;Write the data to an '''Excel sheet'''.&amp;lt;/div&amp;gt;&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|| Show Slide:'''About the Spoken Tutorial Project'''&lt;br /&gt;
| style=&amp;quot;border-top:1pt solid #000000;border-bottom:0.5pt solid #000000;border-left:0.5pt solid #000000;border-right:0.5pt solid #000000;padding-top:0cm;padding-bottom:0cm;padding-left:0.092cm;padding-right:0.191cm;&amp;quot; | The video at the following link summarises the '''Spoken Tutorial Project.'''Please download and watch it.&lt;br /&gt;
|-&lt;br /&gt;
|| Show Slide:&lt;br /&gt;
&lt;br /&gt;
'''Spoken Tutorial Workshops'''&lt;br /&gt;
|| The '''Spoken Tutorial Project''' team conducts workshops and gives certificates.&lt;br /&gt;
&lt;br /&gt;
For more details, please write to us.&lt;br /&gt;
|-&lt;br /&gt;
|| Show Slide:'''Answers for THIS Spoken Tutorial'''&lt;br /&gt;
|| Please post your timed queries in this forum.&lt;br /&gt;
|-&lt;br /&gt;
|| Show Slide: &lt;br /&gt;
&lt;br /&gt;
'''FOSSEE Forum'''&lt;br /&gt;
|| For any general or technical questions on '''Python for Automation''', visit the '''FOSSEE forum''' and post your question.&lt;br /&gt;
|-&lt;br /&gt;
|| Show Slide:'''Acknowledgement'''&lt;br /&gt;
|| The '''Spoken Tutorial Project''' was established by the '''Ministry of Education, Government of India.'''&lt;br /&gt;
|-&lt;br /&gt;
|| Show Slide:'''Thank You'''&lt;br /&gt;
|| This is '''Sai''' '''Sathwik''', a FOSSEE Semester Long Intern 2024, IIT Bombay signing off.&lt;br /&gt;
&lt;br /&gt;
Thanks for joining.&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Nirmala Venkat</name></author>	</entry>

	</feed>