VBA Code for Extraction of WikiTable

Some times Wikipedia consists of interesting data in tabular format and we need it for further analysis of evaluation. Coping in excel through manually is time consuming, the vba code presented below can solve this issue. The red mark code can be changed to as per your need. yx denoted rows and column where data needs to extracted. url is the wikipedia url and tblnumber is the table number (start form 0 as first table).

Sub wikitable()
Dim oDom As Object: Set oDom = CreateObject("htmlFile")
Dim x As Long, y As Long
Dim oRow As Object, oCell As Object
Dim data
y = 1: x = 1
url = "https://en.wikipedia.org/wiki/List_of_mobile_network_operators"
tblnumber = 0

With CreateObject("msxml2.xmlhttp")
.Open "GET", url, False
.Send
oDom.body.innerHtml = .responseText
End With
With oDom.GetELementsbytagNAme("table")(tblnumber)
ReDim data(1 To .Rows.Length, 1 To .Rows(1).Cells.Length)
For Each oRow In .Rows
For Each oCell In oRow.Cells
data(x, y) = oCell.innerText
y = y + 1
Next oCell
y = 1
x = x + 1
Next oRow
End With
ActiveSheet.Cells(1, 1).Resize(UBound(data), UBound(data, 2)).Value = data
End Sub

Another approach using IE as here:


Sub Test()
   Dim ie As Object, i As Long, strText As String
   Dim doc As Object, hTable As Object, hBody As Object, hTR As Object, hTD As Object
   Dim tb As Object, bb As Object, tr As Object, td As Object
   Dim y As Long, z As Long, wb As Excel.Workbook, ws As Excel.Worksheet
     Set wb = Excel.ActiveWorkbook
     Set ws = wb.ActiveSheet
     Set ie = CreateObject("InternetExplorer.Application")
     ie.Visible = True
      y = 1   'Column A in Excel
      z = 1   'Row 1 in Excel
     ie.navigate "http://", , , , "Content-Type: application/x-www-form-urlencoded" & vbCrLf
     Do While ie.busy: DoEvents: Loop
     Do While ie.ReadyState <> 4: DoEvents: Loop
     Set doc = ie.document
     Set hTable = doc.GetElementsByTagName("table")
     For Each tb In hTable
        Set hBody = tb.GetElementsByTagName("tbody")
        For Each bb In hBody
            Set hTR = bb.GetElementsByTagName("tr")
            For Each tr In hTR
                 Set hTD = tr.GetElementsByTagName("td")
                 y = 1 ' Resets back to column A
                 For Each td In hTD
                   ws.Cells(z, y).Value = td.innertext
                   y = y + 1
                 Next td
                 DoEvents
                 z = z + 1
            Next tr
            Exit For
        Next bb
    Exit For
  Next tb
End Sub

Comments

Popular posts from this blog

Databases on the FDA Website

Employee Retention – A critical issue, why..and How to solve?

IPEXL - New Patent Search Tool