For a research project, we needed to extract data from a webpage and save it to the disk before injecting it into our analytics pipeline.

Here is a way to accomplish it.

Browse to the Webpage

For demonstration, we will extract all companies in the MBA.org website:

register

Inject jQuery

The first step is to inject jquery into the webpage if it is not already available. An easy way to do it is to use the jQuerify extension

Get data from the DOM

Inspect the DOM and get the required data

// get the companies data
var companies = $('select.lbxCompanySelection option');
var companies_arr = [];

companies.each(function(index, companie){
     // console.log(companie);
     var a = new Object();
     a.id = $(companie).attr('value');
     a.name = $(companie).text().trim();
     // console.log(a);
     companies_arr.push(a);
});

console.log(companies_arr);

Create the function for saving data

In devtools, create the function for saving data

 // credit: https://techtalkbook.com/export-data-from-the-chrome-browser-console/
 // credit: https://www.freecodecamp.org/news/how-to-use-the-browser-console-to-scrape-and-save-data-in-a-file-with-javascript-b40f4ded87ef/

(function(console) {
    console.save = function(data, filename) {

        if (!data) {
            console.error('Console.save: No data');
            return;
        }

        if (!filename) {
            filename = 'console.json';
        }

        if (typeof data === 'object') {
            data = JSON.stringify(data, undefined, 4);
        }

        var blob = new Blob([data], {type: 'text/json'}),
            e    = document.createEvent('MouseEvents'),
            a    = document.createElement('a');

        a.download = filename;
        a.href = window.URL.createObjectURL(blob);
        a.dataset.downloadurl =  ['text/json', a.download, a.href].join(':');
        e.initMouseEvent('click', true, false, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
        a.dispatchEvent(e);
    }
})(console);

Save data

console.save(companies_arr, 'companies.json');

Voila! There you have the required data in the ~/Downloads directory.

Display data in console

Bonus! Here are some cool ways to see the data

# credit: [bash - How to format a JSON string as a table using jq? - Stack Overflow](https://stackoverflow.com/questions/39139107/how-to-format-a-json-string-as-a-table-using-jq)

cat ~/Downloads/companies.json | jq -r '.[] | "\(.id)\t\(.name)"' | head

cat ~/Downloads/companies.json | jq -r '.[] | [.id, .name] | @tsv' | head

cat ~/Downloads/companies.json | jq -r '["ID","NAME"], ["--","------"], (.[] | [.id, .name]) | @tsv' | head

cat ~/Downloads/companies.json | jq -r '(["ID","NAME"] | (., map(length*"-"))), (.[] | [.id, .name]) | @tsv' | head

Now you are ready to run it through your analytics pipeline!