retriever.engines package

Submodules

retriever.engines.csvengine module

class retriever.engines.csvengine.engine

Bases: retriever.lib.engine.Engine

Engine instance for writing data to a CSV file.

abbreviation = 'csv'
auto_column_number = 0
create_db()

Override create_db since there is no database just a CSV file

create_table()

Create the table by creating an empty csv file

datatypes = {'auto': 'INTEGER', 'bigint': 'INTEGER', 'bool': 'INTEGER', 'char': 'TEXT', 'decimal': 'REAL', 'double': 'REAL', 'int': 'INTEGER'}
disconnect()

Close the last file in the dataset

disconnect_files()

Close each file after being written

execute(statement, commit=True)

Write a line to the output file

executemany(statement, values, commit=True)

Write a line to the output file

format_insert_value(value, datatype)

Formats a value for an insert statement

get_connection()

Gets the db connection.

insert_limit = 1000
insert_statement(values)

Returns a comma delimited row of values

name = 'CSV'
required_opts = [('table_name', 'Format of table name', './{db}_{table}.csv')]
table_exists(dbname, tablename)

Check to see if the data file currently exists

table_names = []
to_csv(sort=True)

Export sorted version of CSV file

retriever.engines.download_only module

retriever.engines.download_only.dummy_method(self, *args, **kwargs)
class retriever.engines.download_only.engine

Bases: retriever.lib.engine.Engine

Engine instance for writing data to a CSV file.

abbreviation = 'download'
add_to_table(*args, **kwargs)
auto_create_table(table, url=None, filename=None, pk=None)

Download the file if it doesn’t exist

auto_get_datatypes(*args, **kwargs)
auto_get_delimiter(*args, **kwargs)
connect(*args, **kwargs)
convert_data_type(*args, **kwargs)
create_db(*args, **kwargs)
create_db_statement(*args, **kwargs)
create_table(*args, **kwargs)
create_table_statement(*args, **kwargs)
database_name(*args, **kwargs)
disconnect(*args, **kwargs)
drop_statement(*args, **kwargs)
execute(*args, **kwargs)
executemany(*args, **kwargs)
extract_fixed_width(*args, **kwargs)
extract_gz(*args, **kwargs)
extract_tar(*args, **kwargs)
extract_zip(*args, **kwargs)
fetch_tables(*args, **kwargs)
final_cleanup()

Copies downloaded files to desired directory

Copies the downloaded files into the chosen directory unless files with the same name already exist in the directory.

find_file(filename)

Checks for the given file and adds it to the list of all files

format_insert_value(*args, **kwargs)
get_connection()

Gets the db connection.

get_ct_data(*args, **kwargs)
get_ct_line_length(*args, **kwargs)
get_cursor(*args, **kwargs)
get_input(*args, **kwargs)
insert_data_from_archive(*args, **kwargs)
insert_data_from_file(*args, **kwargs)
insert_data_from_url(url)

Insert data from a web resource

insert_raster(*args, **kwargs)
insert_statement(*args, **kwargs)
insert_vector(*args, **kwargs)
load_data(*args, **kwargs)
name = 'Download Only'
next(*args, **kwargs)
register_files(filenames)

Identify a list of files to be moved by the download

When downloading archives with multiple files the engine needs to be informed of all of the file names so that it can move them.

required_opts = [('path', 'File path to copy data files', './'), ('subdir', 'Keep the subdirectories for archived files', False)]
set_engine_encoding(*args, **kwargs)
set_table_delimiter(*args, **kwargs)
supported_raster(*args, **kwargs)
table_exists(dbname, tablename)

Checks if the file to be downloaded already exists

table_name(*args, **kwargs)
to_csv(*args, **kwargs)
warning(*args, **kwargs)

retriever.engines.jsonengine module

Engine for writing data to a JSON file

class retriever.engines.jsonengine.engine

Bases: retriever.lib.engine.Engine

Engine instance for writing data to a CSV file.

abbreviation = 'json'
auto_column_number = 0
create_db()

Override create_db since there is no database just a JSON file

create_table()

Create the table by creating an empty json file

datatypes = {'auto': 'INTEGER', 'bigint': 'INTEGER', 'bool': 'INTEGER', 'char': 'TEXT', 'decimal': 'REAL', 'double': 'REAL', 'int': 'INTEGER'}
disconnect()

Close out the JSON with a n]} and close the file.

Close all the file objects that have been created Re-write the files stripping off the last comma and then close with a n]}.

execute(statement, commit=True)

Write a line to the output file

executemany(statement, values, commit=True)

Write a line to the output file

format_insert_value(value, datatype)

Formats a value for an insert statement

get_connection()

Gets the db connection.

insert_limit = 1000
insert_statement(values)

Return SQL statement to insert a set of values.

name = 'JSON'
required_opts = [('table_name', 'Format of table name', './{db}_{table}.json')]
table_exists(dbname, tablename)

Check to see if the data file currently exists

table_names = []
to_csv(sort=True, path=None)

Export table from json engine to CSV file

retriever.engines.msaccess module

class retriever.engines.msaccess.engine

Bases: retriever.lib.engine.Engine

Engine instance for Microsoft Access.

abbreviation = 'msaccess'
convert_data_type(datatype)

MS Access can’t handle complex Decimal types

create_db()

MS Access doesn’t create databases.

datatypes = {'auto': 'AUTOINCREMENT', 'bigint': 'INTEGER', 'bool': 'BIT', 'char': 'VARCHAR', 'decimal': 'NUMERIC', 'double': 'NUMERIC', 'int': 'INTEGER'}
drop_statement(object_type, object_name)

Returns a drop table or database SQL statement.

get_connection()

Gets the db connection.

insert_data_from_file(filename)

Perform a bulk insert.

insert_limit = 1000
instructions = 'Create a database in Microsoft Access, close Access,then \nselect your database file using this dialog.'
name = 'Microsoft Access'
placeholder = '?'
required_opts = [('file', 'Enter the filename of your Access database', './access.mdb', 'Access databases (*.mdb, *.accdb)|*.mdb;*.accdb'), ('table_name', 'Format of table name', '[{db} {table}]')]

retriever.engines.mysql module

class retriever.engines.mysql.engine

Bases: retriever.lib.engine.Engine

Engine instance for MySQL.

abbreviation = 'mysql'
create_db_statement()

Return SQL statement to create a database.

datatypes = {'auto': 'INT(5) NOT NULL AUTO_INCREMENT', 'bigint': 'BIGINT', 'bool': 'BOOL', 'char': ('TEXT', 'VARCHAR'), 'decimal': 'DECIMAL', 'double': 'DOUBLE', 'int': 'INT'}
get_connection()

Get db connection.

PyMySQL has changed the default encoding from latin1 to utf8mb4. https://github.com/PyMySQL/PyMySQL/pull/692/files For PyMySQL to work well on CI infrastructure, connect with the preferred charset

insert_data_from_file(filename)

Call MySQL “LOAD DATA LOCAL INFILE” statement to perform a bulk insert.

insert_limit = 1000
lookup_encoding()

Convert well known encoding to MySQL syntax

MySQL has a unique way of representing the encoding. For example, latin-1 becomes latin1 in MySQL. Please update the encoding lookup table if the required encoding is not present.

max_int = 4294967295
name = 'MySQL'
placeholder = '%s'
required_opts = [('user', 'Enter your MySQL username', 'root'), ('password', 'Enter your password', ''), ('host', 'Enter your MySQL host', 'localhost'), ('port', 'Enter your MySQL port', 3306), ('database_name', 'Format of database name', '{db}'), ('table_name', 'Format of table name', '{db}.{table}')]
set_engine_encoding()

Set MySQL database encoding to match data encoding

table_exists(dbname, tablename)

Check to see if the given table exists.

retriever.engines.postgres module

class retriever.engines.postgres.engine

Bases: retriever.lib.engine.Engine

Engine instance for PostgreSQL.

abbreviation = 'postgres'
auto_create_table(table, url=None, filename=None, pk=None)

Create a table automatically.

Overwrites the main Engine class. Identifies the type of table to create. For a Raster or vector (Gis) dataset, create the table from the contents downloaded from the url or from the contents in the filename. Otherwise, use the Engine function for a tabular table.

create_db()

Create Engine database.

create_db_statement()

In PostgreSQL, the equivalent of a SQL database is a schema.

create_table()

Create a table and commit.

PostgreSQL needs to commit operations individually. Enable PostGis extensions if a script has a non tabular table.

datatypes = {'auto': 'serial', 'bigint': 'bigint', 'bool': 'boolean', 'char': 'varchar', 'decimal': 'decimal', 'double': 'double precision', 'int': 'integer'}
drop_statement(objecttype, objectname)

In PostgreSQL, the equivalent of a SQL database is a schema.

format_insert_value(value, datatype)

Format value for an insert statement.

get_connection()

Gets the db connection.

Please update the encoding lookup table if the required encoding is not present.

insert_data_from_file(filename)

Use PostgreSQL’s “COPY FROM” statement to perform a bulk insert.

insert_limit = 1000
insert_raster(path=None, srid=4326)

Import Raster into Postgis Table Uses raster2pgsql -I -C -s <SRID> <PATH> <SCHEMA>.<DBTABLE> | psql -d <DATABASE> The sql processed by raster2pgsql is run as psql -U postgres -d <gisdb> -f <elev>.sql

insert_statement(values)

Return SQL statement to insert a set of values.

insert_vector(path=None, srid=4326)

Import Vector into Postgis Table

– Enable PostGIS (includes raster) CREATE EXTENSION postgis;

– Enable Topology CREATE EXTENSION postgis_topology;

– fuzzy matching needed for Tiger CREATE EXTENSION fuzzystrmatch;

– Enable US Tiger Geocoder CREATE EXTENSION postgis_tiger_geocoder; Uses shp2pgsql -I -s <SRID> <PATH/TO/SHAPEFILE> <SCHEMA>.<DBTABLE> | psql -U postgres -d <DBNAME>>

The sql processed by shp2pgsql is run as psql -U postgres -d <DBNAME>>

max_int = 2147483647
name = 'PostgreSQL'
placeholder = '%s'
required_opts = [('user', 'Enter your PostgreSQL username', 'postgres'), ('password', 'Enter your password', ''), ('host', 'Enter your PostgreSQL host', 'localhost'), ('port', 'Enter your PostgreSQL port', 5432), ('database', 'Enter your PostgreSQL database name', 'postgres'), ('database_name', 'Format of schema name', '{db}'), ('table_name', 'Format of table name', '{db}.{table}')]
spatial_support = True
supported_raster(path, ext=None)

Return the supported Gis raster files from the path

Update the extensions after testing if a given raster type is supported by raster2pgsql.

retriever.engines.sqlite module

class retriever.engines.sqlite.engine

Bases: retriever.lib.engine.Engine

Engine instance for SQLite.

abbreviation = 'sqlite'
create_db()

Don’t create database for SQLite

SQLite doesn’t create databases. Each database is a file and needs a separate connection. This overloads`create_db` to do nothing in this case.

datatypes = {'auto': ('INTEGER', 'AUTOINCREMENT'), 'bigint': 'INTEGER', 'bool': 'INTEGER', 'char': 'TEXT', 'decimal': 'REAL', 'double': 'REAL', 'int': 'INTEGER'}
fetch_tables(dataset, table_names)

Return sqlite dataset as list of pandas dataframe.

get_bulk_insert_statement()

Get insert statement for bulk inserts

This places ?’s instead of the actual values so that executemany() can operate as designed

get_connection()

Get db connection.

insert_data_from_file(filename)

Perform a high speed bulk insert

Checks to see if a given file can be bulk inserted, and if so loads it in chunks and inserts those chunks into the database using executemany.

insert_limit = 1000
name = 'SQLite'
placeholder = '?'
required_opts = [('file', 'Enter the filename of your SQLite database', './sqlite.db', ''), ('table_name', 'Format of table name', '{db}_{table}')]

retriever.engines.xmlengine module

class retriever.engines.xmlengine.engine

Bases: retriever.lib.engine.Engine

Engine instance for writing data to a XML file.

abbreviation = 'xml'
auto_column_number = 0
create_db()

Override create_db since there is no database just an XML file.

create_table()

Create the table by creating an empty XML file.

datatypes = {'auto': 'INTEGER', 'bigint': 'INTEGER', 'bool': 'INTEGER', 'char': 'TEXT', 'decimal': 'REAL', 'double': 'REAL', 'int': 'INTEGER'}
disconnect()

Close out the xml files

Close all the file objects that have been created Re-write the files stripping off the last comma and then close with a closing tag)

execute(statement, commit=True)

Write a line to the output file.

executemany(statement, values, commit=True)

Write a line to the output file.

format_insert_value(value, datatype)

Format value for an insert statement.

get_connection()

Get db connection.

insert_limit = 1000
insert_statement(values)

Create the insert statement.

Wrap each data value with column values(key) using _format_single_row <key> value </key>.

name = 'XML'
required_opts = [('table_name', 'Format of table name', './{db}_{table}.xml')]
table_names = []
to_csv(sort=True, path=None)

Export table from xml engine to CSV file.