一:使用sql加载外部文件(http)
REPLACE INTO "wikipedia" OVERWRITE ALL WITH ext AS (SELECT * FROM TABLE( EXTERN( '{"type":"http","uris":["https://druid.apache.org/data/wikipedia.json.gz"]}', '{"type":"json"}', '[{"name":"isRobot","type":"string"},{"name":"channel","type":"string"},{"name":"timestamp","type":"string"},{"name":"flags","type":"string"},{"name":"isUnpatrolled","type":"string"},{"name":"page","type":"string"},{"name":"diffUrl","type":"string"},{"name":"added","type":"long"},{"name":"comment","type":"string"},{"name":"commentLength","type":"long"},{"name":"isNew","type":"string"},{"name":"isMinor","type":"string"},{"name":"delta","type":"long"},{"name":"isAnonymous","type":"string"},{"name":"user","type":"string"},{"name":"deltaBucket","type":"long"},{"name":"deleted","type":"long"},{"name":"namespace","type":"string"},{"name":"cityName","type":"string"},{"name":"countryName","type":"string"},{"name":"regionIsoCode","type":"string"},{"name":"metroCode","type":"long"},{"name":"countryIsoCode","type":"string"},{"name":"regionName","type":"string"}]' ) )) SELECT TIME_PARSE("timestamp") AS __time, isRobot, channel, flags, isUnpatrolled, page, diffUrl, added, comment, commentLength, isNew, isMinor, delta, isAnonymous, user, deltaBucket, deleted, namespace, cityName, countryName, regionIsoCode, metroCode, countryIsoCode, regionName FROM ext PARTITIONED BY DAY
二:从kafka摄入数据
{ "type": "kafka", "spec": { "ioConfig": { "type": "kafka", "consumerProperties": { "bootstrap.servers": "localhost:9092" }, "topic": "kttm", "inputFormat": { "type": "json" }, "useEarliestOffset": true }, "tuningConfig": { "type": "kafka" }, "dataSchema": { "dataSource": "kttm-kafka-supervisor-console", "timestampSpec": { "column": "timestamp", "format": "iso" }, "dimensionsSpec": { "dimensions": [ "session", "number", "client_ip", "language", "adblock_list", "app_version", "path", "loaded_image", "referrer", "referrer_host", "server_ip", "screen", "window", { "type": "long", "name": "session_length" }, "timezone", "timezone_offset", { "type": "json", "name": "event" }, { "type": "json", "name": "agent" }, { "type": "json", "name": "geo_ip" } ] }, "granularitySpec": { "queryGranularity": "none", "rollup": false, "segmentGranularity": "day" } } } }
三:从hdfs摄入数据