1+ "use strict" ;
2+
3+ Object . defineProperty ( exports , "__esModule" , {
4+ value : true
5+ } ) ;
6+ exports . open = open ;
7+ exports . isDataset = exports . isUrl = exports . parseDatasetIdentifier = exports . parsePath = exports . KNOWN_TABULAR_FORMAT = exports . PARSE_DATABASE = exports . DEFAULT_ENCODING = void 0 ;
8+
9+ var _fs = _interopRequireDefault ( require ( "fs" ) ) ;
10+
11+ var _path = _interopRequireDefault ( require ( "path" ) ) ;
12+
13+ var _url = _interopRequireDefault ( require ( "url" ) ) ;
14+
15+ var _nodeFetch = _interopRequireDefault ( require ( "node-fetch" ) ) ;
16+
17+ var _lodash = require ( "lodash" ) ;
18+
19+ var _mimeTypes = _interopRequireDefault ( require ( "mime-types" ) ) ;
20+
21+ var _csv = require ( "./parser/csv" ) ;
22+
23+ var _xlsx = require ( "./parser/xlsx" ) ;
24+
25+ var _index = require ( "./browser-utils/index" ) ;
26+
27+ var _fileInterface = require ( "./file-interface" ) ;
28+
29+ var _fileLocal = require ( "./file-local" ) ;
30+
31+ var _fileRemote = require ( "./file-remote" ) ;
32+
33+ var _fileInline = require ( "./file-inline" ) ;
34+
35+ function _interopRequireDefault ( obj ) { return obj && obj . __esModule ? obj : { default : obj } ; }
36+
37+ const DEFAULT_ENCODING = 'utf-8' ;
38+ exports . DEFAULT_ENCODING = DEFAULT_ENCODING ;
39+ const PARSE_DATABASE = {
40+ csv : _csv . csvParser ,
41+ tsv : _csv . csvParser ,
42+ xlsx : _xlsx . xlsxParser ,
43+ xls : _xlsx . xlsxParser
44+ } ;
45+ exports . PARSE_DATABASE = PARSE_DATABASE ;
46+ const KNOWN_TABULAR_FORMAT = [ 'csv' , 'tsv' , 'dsv' ] ;
47+ exports . KNOWN_TABULAR_FORMAT = KNOWN_TABULAR_FORMAT ;
48+
49+ function open ( pathOrDescriptor , {
50+ basePath,
51+ format
52+ } = { } ) {
53+ let descriptor = null ;
54+
55+ if ( ( 0 , _index . isFileFromBrowser ) ( pathOrDescriptor ) ) {
56+ return new _fileInterface . FileInterface ( pathOrDescriptor ) ;
57+ }
58+
59+ if ( ( 0 , _lodash . isPlainObject ) ( pathOrDescriptor ) ) {
60+ descriptor = ( 0 , _lodash . cloneDeep ) ( pathOrDescriptor ) ;
61+
62+ if ( descriptor . data ) {
63+ return new _fileInline . FileInline ( descriptor , {
64+ basePath
65+ } ) ;
66+ } else if ( descriptor . path ) {
67+ descriptor = Object . assign ( parsePath ( descriptor . path , basePath ) , descriptor ) ;
68+ }
69+ } else if ( ( 0 , _lodash . isString ) ( pathOrDescriptor ) ) {
70+ descriptor = parsePath ( pathOrDescriptor , basePath , format ) ;
71+ } else {
72+ throw new TypeError ( `Cannot create File from ${ pathOrDescriptor } ` ) ;
73+ }
74+
75+ const isRemote = descriptor . pathType === 'remote' || isUrl ( basePath ) ;
76+
77+ if ( isRemote ) {
78+ return new _fileRemote . FileRemote ( descriptor , {
79+ basePath
80+ } ) ;
81+ }
82+
83+ return new _fileLocal . FileLocal ( descriptor , {
84+ basePath
85+ } ) ;
86+ }
87+
88+ const parsePath = ( path_ , basePath = null , format = null ) => {
89+ let fileName ;
90+ const isItUrl = isUrl ( path_ ) || isUrl ( basePath ) ;
91+
92+ if ( isItUrl ) {
93+ const urlParts = _url . default . parse ( path_ ) ;
94+
95+ fileName = urlParts . pathname . replace ( / ^ .* [ \\ \/ ] / , '' ) ;
96+
97+ if ( ! format && urlParts . query && urlParts . query . includes ( 'format=csv' ) ) {
98+ format = 'csv' ;
99+ }
100+ } else {
101+ fileName = path_ . replace ( / ^ .* [ \\ \/ ] / , '' ) ;
102+ }
103+
104+ const extension = _path . default . extname ( fileName ) ;
105+
106+ fileName = fileName . replace ( extension , '' ) . toLowerCase ( ) . trim ( ) . replace ( / & / g, '-and-' ) . replace ( / [ ^ a - z 0 - 9 - ._ ] + / g, '-' ) ;
107+ const descriptor = {
108+ path : path_ ,
109+ pathType : isItUrl ? 'remote' : 'local' ,
110+ name : fileName ,
111+ format : format ? format : extension . slice ( 1 ) . toLowerCase ( )
112+ } ;
113+
114+ const mediatype = _mimeTypes . default . lookup ( path_ ) ;
115+
116+ if ( mediatype ) {
117+ descriptor . mediatype = mediatype ;
118+ }
119+
120+ return descriptor ;
121+ } ;
122+
123+ exports . parsePath = parsePath ;
124+
125+ const parseDatasetIdentifier = async path_ => {
126+ const out = {
127+ name : '' ,
128+ owner : null ,
129+ path : '' ,
130+ type : '' ,
131+ original : path_ ,
132+ version : ''
133+ } ;
134+ if ( path_ === null || path_ === '' ) return out ;
135+ out . type = isUrl ( path_ ) ? 'url' : 'local' ;
136+ let normalizedPath = path_ . replace ( / \/ ? d a t a p a c k a g e \. j s o n / , '' ) ;
137+ normalizedPath = normalizedPath . replace ( / \/ $ / , '' ) ;
138+
139+ if ( out . type === 'local' ) {
140+ if ( process . platform === 'win32' ) {
141+ out . path = _path . default . resolve ( normalizedPath ) ;
142+ } else {
143+ out . path = _path . default . posix . resolve ( normalizedPath ) ;
144+ }
145+
146+ out . name = _path . default . basename ( out . path ) ;
147+ } else if ( out . type === 'url' ) {
148+ const urlparts = _url . default . parse ( normalizedPath ) ;
149+
150+ const parts = urlparts . pathname . split ( '/' ) ;
151+ let name = parts [ parts . length - 1 ] ;
152+ let owner = null ;
153+
154+ if ( urlparts . host === 'github.com' ) {
155+ out . type = 'github' ;
156+ urlparts . host = 'raw.githubusercontent.com' ;
157+ owner = parts [ 1 ] ;
158+ let repoName = parts [ 2 ] ;
159+ let branch = 'master' ;
160+
161+ if ( parts . length < 6 ) {
162+ name = repoName ;
163+ }
164+
165+ if ( parts . length == 3 ) {
166+ parts . push ( branch ) ;
167+ } else {
168+ branch = parts [ 4 ] ;
169+ parts . splice ( 3 , 1 ) ;
170+ }
171+
172+ urlparts . pathname = parts . join ( '/' ) ;
173+ out . version = branch ;
174+ } else if ( urlparts . host === 'datahub.io' ) {
175+ out . type = 'datahub' ;
176+ urlparts . host = 'pkgstore.datahub.io' ;
177+ owner = parts [ 1 ] ;
178+ name = parts [ 2 ] ;
179+
180+ if ( owner !== 'core' ) {
181+ let resolvedPath = await ( 0 , _nodeFetch . default ) ( `https://api.datahub.io/resolver/resolve?path=${ owner } /${ name } ` ) ;
182+ resolvedPath = await resolvedPath . json ( ) ;
183+ parts [ 1 ] = resolvedPath . userid ;
184+ }
185+
186+ let res = await ( 0 , _nodeFetch . default ) ( `https://api.datahub.io/source/${ parts [ 1 ] } /${ name } /successful` ) ;
187+
188+ if ( res . status >= 400 ) {
189+ throw new Error ( 'Provided URL is invalid. Expected URL to a dataset or descriptor.' ) ;
190+ }
191+
192+ res = await res . json ( ) ;
193+ const revisionId = parseInt ( res . id . split ( '/' ) . pop ( ) , 10 ) ;
194+ parts . push ( revisionId ) ;
195+ urlparts . pathname = parts . join ( '/' ) ;
196+ out . version = revisionId ;
197+ }
198+
199+ out . name = name ;
200+ out . owner = owner ;
201+ out . path = _url . default . format ( urlparts ) + '/' ;
202+ }
203+
204+ return out ;
205+ } ;
206+
207+ exports . parseDatasetIdentifier = parseDatasetIdentifier ;
208+
209+ const isUrl = path_ => {
210+ const r = new RegExp ( '^(?:[a-z]+:)?//' , 'i' ) ;
211+ return r . test ( path_ ) ;
212+ } ;
213+
214+ exports . isUrl = isUrl ;
215+
216+ const isDataset = path_ => {
217+ if ( path_ . endsWith ( 'datapackage.json' ) ) {
218+ return true ;
219+ }
220+
221+ const isItUrl = isUrl ( path_ ) ;
222+
223+ if ( isItUrl ) {
224+ return false ;
225+ } else if ( _fs . default . lstatSync ( path_ ) . isFile ( ) ) {
226+ return false ;
227+ }
228+
229+ return true ;
230+ } ;
231+
232+ exports . isDataset = isDataset ;
0 commit comments