-
Notifications
You must be signed in to change notification settings - Fork 0
/
Scraper.ps1
334 lines (263 loc) · 13.4 KB
/
Scraper.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
#Requires -Version 6
#Requires -Modules PowerHTML
Import-Module PowerHTML
Import-Module PSSalesForceLogin
#Import-Module ../PSSalesForceLogin/PSSalesForceLogin.psd1 -Force
<#
# List of plugins to download
# Last updated: 31 July 2023
Advanced Alerting Platinum Only
Advanced Clustering Platinum Only
ASTM E1381 Transmission Mode Gold or above
ASTM E1394 Data Type Gold or above
Channel History Silver or above
Cures Certification Support Gold or above
Email Reader Connector Gold or above
Enhancement Bundle Gold and Platinum
FHIR Connector (R4) Gold or above
Health Data Hub Connector Platinum Only
Interoperability Connector Suite Gold or above
License Manager Silver or above
LDAP Authentication Gold or above
Message Generator Silver or above
Mirth Results Connector Platinum Only
Multi-Factor Authentication Gold or above
Role-Based Access Control (User Authorization) Gold or above
Serial Connector Gold or above
SSL Manager Silver or above
#>
# we can request all plugins by providing an empty list
$PluginNames = @()
#$PluginNames = @('LDAP Authentication','Role-Based Access Control (User Authorization)', 'SSL Manager', 'Multi-Factor Authentication', 'FHIR Connector (R4)')
# plugin version to download
$PluginVersion = "4.4"
#your support level so you can skip the access errors
$UserSupportLevel = [SupportLevel]::GOLD
#do you also want to download the plugin's user guide?
$IncludeAttachments = $true
$ErrorActionPreference = 'Stop'
#hard-coded UUID within 1Password account
$1PASS_UUID = 'j5m7piroikq3dznzojyjmodyja'
$BaseUrl = "https://www.community.nextgen.com"
$LoginUrl = $BaseUrl + "/apex/SuccessCommunityLogin"
$PluginListUrl = $BaseUrl + "/optimization/articles/Hot_Topic/Mirth-Plug-In-Central"
enum SupportLevel {
SILVER = 1
GOLD = 2
PLATINUM = 3
}
<#
.SYNOPSIS
Obtain credential via 1password-cli integration
.PARAMETER UUID
UUID of the 1Password item
#>
function Get-1PassCredential {
[CmdletBinding()]
param (
[Parameter(Mandatory)]
[string]
$UUID
)
Begin {
Write-Verbose "$($MyInvocation.MyCommand.Name) BEGIN"
}
Process {
Invoke-Expression $(op signin)
$json = op item get $UUID --fields "username,password" --format json | ConvertFrom-Json
op signout
$SecurePassword = ConvertTo-SecureString $json[1].value -AsPlainText
New-Object System.Management.Automation.PSCredential ($json[0].value, $SecurePassword)
}
End {
Write-Verbose "$($MyInvocation.MyCommand.Name) END"
}
}
function Select-Plugins {
[CmdletBinding()]
param (
[Parameter(Mandatory)]
[Microsoft.PowerShell.Commands.WebRequestSession]
$session
)
Begin {
Write-Verbose "$($MyInvocation.MyCommand.Name) BEGIN"
}
Process {
Write-Debug "Invoking $PluginListUrl"
$WebContent = Invoke-WebRequest -Uri $PluginListUrl -WebSession $session | ConvertFrom-Html
# find the links where the href contains 'articles' as the others link to the Support Levels
#$links = $WebContent.SelectNodes("//div[@class='pbBody']//tbody//td//span//a[contains(@href, 'articles')]")
#https://stackoverflow.com/questions/3920957/xpath-query-with-descendant-and-descendant-text-predicates
$PluginRows = $WebContent.SelectNodes("//div[@class='sfdc_richtext']//tbody//td//span//a[contains(@href, 'articles')]/ancestor::*[self::tr][1]")
Write-Debug "Found $($PluginRows.Count) plugins"
$PluginRows | ForEach-Object {
$Row = $_
$SupportLevelText = $Row.SelectSingleNode("td[2]").InnerText
#the first word defines the minimum support level
$SupportLevel = $SupportLevelText.Split(" ")[0].ToUpper() -as [SupportLevel]
if($null -eq $SupportLevel) {
Write-Error "Unable to determine Support Level for '$SupportLevelText'"
}
$Link = $Row.SelectSingleNode("td[1]//span//a[contains(@href, 'articles')]")
[PSCustomObject]@{
Name = $Link.InnerText.Trim()
Link = $Link
SupportLevel = $SupportLevel
SupportLevelText = $SupportLevelText
}
}
}
End {
Write-Verbose "$($MyInvocation.MyCommand.Name) END"
}
}
function Read-PluginPage {
[CmdletBinding()]
param (
[Parameter(Mandatory, ValueFromPipeline)]
[PSCustomObject]
$PluginInfo,
[Parameter(Mandatory)]
[Microsoft.PowerShell.Commands.WebRequestSession]
$session,
# switch to include attachments along with the plugin
[switch]
$IncludeAttachments
)
Begin {
Write-Verbose "$($MyInvocation.MyCommand.Name) BEGIN"
}
Process {
$PluginName = $PluginInfo.Name
Write-Debug "Processing '$PluginName', `$IncludeAttachments=$IncludeAttachments"
$href = $PluginInfo.Link.GetAttributeValue('href', 'missing-href-value-1')
if ($href -eq 'missing-href-value-1') {
Write-Warning "Could not find href value for $PluginName - you likely don't have the correct support level, skipping"
}
else {
#noticed this wall with 'Mirth Results Connector' and 'Health Data Hub Connector'
#check for nextgenhealthcare.lightning.force.com which redirects to salesforce.com
if ($href -match 'lightning.force.com') {
Write-Warning "Unable to download plugin $PluginName as it requires a salesforce.com account, skipping"
}
else {
$href = $BaseUrl + $href
Write-Debug "Invoking plugin GET `$href=$href"
$WebContent = Invoke-WebRequest -Uri $href -WebSession $session | ConvertFrom-Html
$pluginDownloadLinks = $WebContent.SelectNodes("(//div[@class='pbSubsection'])[6]//table[@class='htmlDetailElementTable']//td//a")
#example text: "Advanced Alerting Plug-in 4.2"
#example text: "Enhancement Bundle Plug-in 4.4.0"
#$pluginDownloadLinks = @($pluginDownloadLinks | Where-Object { $_.InnerText.Trim().EndsWith($PluginVersion) })
$pluginDownloadLinks = @($pluginDownloadLinks | Where-Object { $_.InnerText.Trim().Split(" ")[-1].StartsWith($PluginVersion) })
Write-Verbose "Found $($pluginDownloadLinks.Count) links matching version '$PluginVersion'"
if ($pluginDownloadLinks.Count -eq 0) {
Write-Warning "Failed to find version '$PluginVersion' of $PluginName"
}
elseif ($pluginDownloadLinks.Count -gt 1) {
Write-Error "Found $($pluginDownloadLinks.Count) download links for version '$PluginVersion' of $PluginName, expected 1, skipping"
}
else {
$NameAndVersion = $pluginDownloadLinks[0].InnerText.Trim()
#ex. https://www.community.nextgen.com/apex/ResourceRepository?fileId=a9o4y000000YIdT
$downloadUrl = $pluginDownloadLinks[0].GetAttributeValue('href', 'missing-href-value-2')
Write-Debug "Invoking plugin '$NameAndVersion' GET $downloadUrl"
$downloadResponse = Invoke-WebRequest -Uri $downloadUrl -WebSession $session
#parse window.location.href from javascript in $downloadResponse.Content
#ex. /DownloadSuccess?fileId=a9o4y000000YId9
Write-Debug "Parsing javascript"
$extractedHref = $BaseUrl + (Get-SFHrefFromJavascript $downloadResponse.Content ";")
Write-Debug "Invoking GET $extractedHref"
# call the href that was in the javascript
$secondResponse = Invoke-WebRequest -Uri $extractedHref -WebSession $session | ConvertFrom-Html
#if the plugin is at Support Level "Platinum Only", of which I don't have,
#then the link will send us to a forbidden page that will fail parsing for the hidden "a" tag below.
$hiddenLink = $secondResponse.SelectSingleNode("//a[@class='hidden']")
if ($null -eq $hiddenLink) {
Write-Warning "Failed to find download link - you likely don't have the correct support level, skipping"
}
else {
# extract final HREF
# ex. https://nextgen-aws-salesforce-prod-sdrive-us-east-2.s3.us-east-2.amazonaws.com/a9o4y000000YId9AAG/ldap-3.12.0.b1752.zip?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIA2ZO6ZFSFWXJS7NWF%2F20220318%2Fus-east-2%2Fs3%2Faws4_request&X-Amz-Date=20220318T051704Z&X-Amz-Expires=216000&X-Amz-SignedHeaders=host&X-Amz-Signature=962a48d81f6091b86855eb2041df9bd0ba3123479ac87cc860bc061aa82edaff
$extractedHref = $hiddenLink.GetAttributeValue('href', 'missing-href-value-3')
Write-Debug "Found `$hiddenLink with `$extractedHref=$extractedHref"
Write-Debug "Decoding `$extractedHref"
# decode this or else the AWS call will fail
[uri]$decodedHref = [System.Web.HttpUtility]::HtmlDecode($extractedHref)
# extract filename
$filename = $decodedHref.Segments[-1]
Write-Debug "Downloading plugin '$filename' from $($decodedHref.AbsoluteUri)"
Invoke-WebRequest -Uri $decodedHref.AbsoluteUri -OutFile $filename
}
}
}
}
if ($IncludeAttachments) {
# download all attachments
$attachments = $WebContent.SelectNodes("(//div[@class='pbSubsection'])[7]//table[@class='detailList']//a")
foreach ($attachment in $attachments) {
$filename = $attachment.InnerText.Trim()
if ($filename -eq '') {
Write-Warning "Attachment filename is empty, skipping"
}
else {
if (-not $filename.EndsWith(".pdf")) {
Write-Verbose "Appending .pdf to filename"
$filename += ".pdf"
}
Write-Verbose "Downloading attachment '$filename'"
$href = $BaseUrl + $attachment.GetAttributeValue('href', 'missing-attachment-href-value')
Write-Debug "Invoking GET `$filename=$filename, `$href=$href"
Invoke-WebRequest -Uri $href -WebSession $session -OutFile $filename
}
}
}
}
End {
Write-Verbose "$($MyInvocation.MyCommand.Name) END"
}
}
function Start-Scrape {
[CmdletBinding()]
param ()
Begin {
Write-Verbose "$($MyInvocation.MyCommand.Name) BEGIN"
$StoredPSDefaultParameterValues = $PSDefaultParameterValues.Clone()
$StoredProgressPreference = $ProgressPreference
#it will whine about these parameters being re-added
$PSDefaultParameterValues.Remove('Invoke-WebRequest:Debug')
$PSDefaultParameterValues.Remove('Invoke-WebRequest:Verbose')
# quiet this chatty function
$PSDefaultParameterValues.Add('Invoke-WebRequest:Debug', $False)
$PSDefaultParameterValues.Add('Invoke-WebRequest:Verbose', $False)
# don't show progress bars
$ProgressPreference = 'SilentlyContinue'
}
Process {
# this will hold our cookies and be used in (most) web requests
$session = Get-1PassCredential $1PASS_UUID | Invoke-SFLogin $LoginUrl
$AllPlugins = Select-Plugins $session
Write-Debug "Grouping by SupportLevel=$UserSupportLevel"
$AcceptedPlugins,$ExcludedPlugins = $AllPlugins.Where({
#Write-Debug "Comparing $UserSupportLevel -ge $SupportLevel"
$UserSupportLevel -ge $_.SupportLevel
}, 'Split')
Write-Debug ("SupportLevel accepted={0}, filtered={1}. Filtered names: {2}" -f $AcceptedPlugins.Count, $ExcludedPlugins.Count, ($ExcludedPlugins.Name -join ", "))
Write-Debug "Filtering plugins to those in `$PluginNames"
#Limit plugins to those listed by the user.
#User can provide an empty list indicating all plugins.
if ($PluginNames.Count -eq 0) {
Write-Debug "`$PluginNames is empty, using all accepted plugins"
}
else {
$AcceptedPlugins = @($AcceptedPlugins | Where-Object -FilterScript { $PluginNames -contains $_.Name })
}
$AcceptedPlugins | Read-PluginPage -session $session -IncludeAttachments:$IncludeAttachments
}
End {
$PSDefaultParameterValues = $StoredPSDefaultParameterValues
$ProgressPreference = $StoredProgressPreference
Write-Verbose "$($MyInvocation.MyCommand.Name) END"
}
}
Start-Scrape -Debug -Verbose