Commit 2f8063e9 authored by Clara Stoddart's avatar Clara Stoddart

A directory which handles connecting to and populating the database, reading a...

A directory which handles connecting to and populating the database, reading a CSV file from scraping the Waitrose website
parent 9cc1a24c
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="masterDetails">
<states>
<state key="ProjectJDKs.UI">
<settings>
<last-edited>11</last-edited>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
</states>
</component>
</project>
\ No newline at end of file
# Default ignored files
/workspace.xml
\ No newline at end of file
<component name="ProjectCodeStyleConfiguration">
<code_scheme name="Project" version="173">
<DBN-PSQL>
<case-options enabled="true">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false" />
</DBN-PSQL>
<DBN-SQL>
<case-options enabled="true">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false">
<option name="STATEMENT_SPACING" value="one_line" />
<option name="CLAUSE_CHOP_DOWN" value="chop_down_if_statement_long" />
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings>
</DBN-SQL>
<DBN-PSQL>
<case-options enabled="true">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false" />
</DBN-PSQL>
<DBN-SQL>
<case-options enabled="true">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false">
<option name="STATEMENT_SPACING" value="one_line" />
<option name="CLAUSE_CHOP_DOWN" value="chop_down_if_statement_long" />
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings>
</DBN-SQL>
</code_scheme>
</component>
\ No newline at end of file
This diff is collapsed.
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="GoogleJavaFormatSettings">
<option name="enabled" value="false" />
</component>
</project>
\ No newline at end of file
<component name="libraryTable">
<library name="postgresql-42.2.14">
<CLASSES>
<root url="jar://C:/postgresql-42.2.14.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="11" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/database-creation.iml" filepath="$PROJECT_DIR$/database-creation.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="postgresql-42.2.14" level="project" />
</component>
</module>
\ No newline at end of file
This diff is collapsed.
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
/**
* Connects to the database given by the system environment variable DATABASE_URL, reads line by line
* from a CSV file, with a path given by the field csvFile, and writes into the database table given by TABLE_NAME.
*
* BEFORE RUNNING THIS PROGRAM:
* Ensure you have a JDBC driver installed, and have set the JDBC driver .jar file as an external library.
* Ensure you have a system environment variable called DATABASE_URL, set to the database url you want to
* connect to, which is given by Heroku.
*/
public class CSVReading {
private static String csvFile = "C:\\Clickerance\\chrome-groceries-extension\\database-creation\\src\\25-June-2020-data.csv";
private static final String TABLE_NAME = "ProductsIngredients";
enum Ingredient {
GLUTEN,
MEAT,
CORN,
DAIRY,
}
/**
* Sets up new connection to the database, to the specific table in the database
* named TABLE_NAME. If there does not exist a table called TABLE_NAME, this is
* not an issue, as deleteTable only deletes the table if it exists, and then
* a new table called TABLE_NAME will be created.
*/
public static void main(String[] args) {
DatabaseConnection connection = new DatabaseConnection(TABLE_NAME);
connection.deleteTable();
connection.createTable();
BufferedReader csvReader;
try {
csvReader = new BufferedReader(new FileReader(csvFile));
} catch (FileNotFoundException e) {
System.err.println("CSV file not found");
return;
}
readCSV(csvReader, connection);
}
public static void readCSV(BufferedReader csvReader, DatabaseConnection connection) {
int nameIndex = -1;
int ingredientsIndex = -1;
int allergensIndex = -1;
try {
String[] csvColumns = csvReader.readLine().split(",");
for (int i = 0; i < csvColumns.length; i++) {
if ("Name".equals(csvColumns[i])) {
nameIndex = i;
} else if ("Ingredient List".equals(csvColumns[i])) {
ingredientsIndex = i;
} else if ("Allergy Advice".equals(csvColumns[i])) {
allergensIndex = i;
}
}
String line;
while ((line = csvReader.readLine()) != null) {
/** This regex splits the line read from the csv file by commas, unless the commas are within quotation marks.
* This accounts for commas within the ingredients column.
*/
String[] row = line.split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
Map<Ingredient, Boolean> contains = parseIngredients(row[ingredientsIndex]);
Map<Ingredient, Boolean> mayContain = parseAllergens(row[allergensIndex]);
writeProduct(row[nameIndex], contains, mayContain, connection);
}
} catch (IOException e) {
System.out.println("Error reading CSV file");
}
}
/**
* @param ingredientList: List of ingredients of a given product
* @return A map specifying if an ingredient is or is not contained in the product
*/
static Map<Ingredient, Boolean> parseIngredients(String ingredientList) {
Map<Ingredient, Boolean> contains = new HashMap<>();
ArrayList<String> ingredients = new ArrayList<>(Arrays.asList(ingredientList.split("\\W")));
for (int i = 0; i < ingredients.size(); i++) {
String str = ingredients.get(i);
/** When ingredients is split into words, there are some empty strings that result
* from the split. This removes the empty strings.
*/
if (str.equals("")) {
ingredients.remove(i);
i--;
} else {
/** We set the ingredient name to lower case to ensure consistency */
ingredients.set(i, str.toLowerCase());
}
}
for (int i = 0; i < Ingredient.values().length; i++) {
contains.put(Ingredient.values()[i], false);
}
for (int i = 0; i < ingredients.size(); i++) {
if ("gluten".equals(ingredients.get(i)) || "wheat".equals(ingredients.get(i)) ||
"rye".equals(ingredients.get(i)) || "barley".equals(ingredients.get(i)) ||
"malt".equals(ingredients.get(i))) {
contains.replace(Ingredient.GLUTEN, true);
} else if ("corn".equals(ingredients.get(i)) || "cornstarch".equals(ingredients.get(i)) ||
"cornmeal".equals(ingredients.get(i)) || "cornflake".equals(ingredients.get(i)) ||
"starch".equals(ingredients.get(i)) || "maize".equals(ingredients.get(i)) ||
"dextrins".equals(ingredients.get(i)) || "maltodextrins".equals(ingredients.get(i)) ||
"dextrose".equals(ingredients.get(i)) || "fructose".equals(ingredients.get(i)) ||
"hydrol".equals(ingredients.get(i)) || "treacle".equals(ingredients.get(i)) ||
"ethanol".equals(ingredients.get(i)) || ((i < ingredients.size() - 1) && "fatty acids".equals(ingredients.get(i) + " " + ingredients.get(i + 1))) ||
"zein".equals(ingredients.get(i)) || "sorbitol".equals(ingredients.get(i)))
{
contains.replace(Ingredient.CORN, true);
} else if ("meat".equals(ingredients.get(i)) || "pork".equals(ingredients.get(i)) || "bacon".equals(ingredients.get(i)) ||
"gelatin".equals(ingredients.get(i)) || "beef".equals(ingredients.get(i)) || "lamb".equals(ingredients.get(i)) ||
"gelatine".equals(ingredients.get(i)))
{
contains.replace(Ingredient.MEAT, true);
} else if ("milk".equals(ingredients.get(i)) || "butter".equals(ingredients.get(i)) || "cream".equals(ingredients.get(i)) ||
"lactose".equals(ingredients.get(i)) || "yogurt".equals(ingredients.get(i)) || "whey".equals(ingredients.get(i)) ||
"cheese".equals(ingredients.get(i)) || "custard".equals(ingredients.get(i)) || "nougat".equals(ingredients.get(i)) ||
"lactulose".equals(ingredients.get(i))) {
contains.replace(Ingredient.DAIRY, true);
}
}
return contains;
}
/**
* Returns a Map specifying if an ingredient may be contained in the product. We do not consider
* "may contain" allergens for the purpose of the MVP, hence this is incomplete.
*/
static Map<Ingredient, Boolean> parseAllergens(String allergensList) {
Map<Ingredient, Boolean> mayContain = new HashMap<>();
String[] ingredients = allergensList.split(",");
for (int i = 0; i < Ingredient.values().length; i++) {
mayContain.put(Ingredient.values()[i], false);
}
for (int i = 0; i < ingredients.length; i++) {
}
return mayContain;
}
static void writeProduct(String prodName, Map<Ingredient, Boolean> contains, Map<Ingredient, Boolean> mayContain, DatabaseConnection connection) {
/** We do not consider "may contain" allergens for the purpose of the MVP */
String containsGluten = contains.get(Ingredient.GLUTEN) ? "TRUE" : "FALSE";
String containsDairy = contains.get(Ingredient.DAIRY) ? "TRUE" : "FALSE";
String containsCorn = contains.get(Ingredient.CORN) ? "TRUE" : "FALSE";
String containsMeat = contains.get(Ingredient.MEAT) ? "TRUE" : "FALSE";
connection.execute("INSERT INTO " + TABLE_NAME + " (product, containsGluten, containsDairy, containsCorn, containsMeat) " +
"VALUES (\'" + prodName + "\', " + containsGluten + ", " + containsDairy + ", " + containsCorn + ", " + containsMeat + ");");
}
}
import java.net.URI;
import java.net.URISyntaxException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
public class DatabaseConnection {
private String tableName;
private Statement stmt;
public DatabaseConnection(String tableName) {
this.tableName = tableName;
try {
Connection connection = getConnection();
this.stmt = connection.createStatement();
} catch (SQLException | URISyntaxException e) {
System.err.println("Error connecting to database");
}
}
public boolean createTable() {
try {
stmt.execute("CREATE TABLE " + tableName + " (product varchar(255), containsGluten BOOLEAN, containsDairy BOOLEAN, containsCorn BOOLEAN, containsMeat BOOLEAN)");
return true;
} catch (SQLException e) {
System.err.println("Error creating table");
System.out.println(e.getMessage());
return false;
}
}
public boolean deleteTable() {
try {
stmt.execute("DROP TABLE IF EXISTS " + tableName + ";");
return true;
} catch (SQLException e) {
System.err.println("Error deleting table");
System.out.println(e.getMessage());
return false;
}
}
boolean execute(String sqlStatement) {
try {
stmt.execute(sqlStatement);
return true;
} catch (SQLException e) {
return false;
}
}
private static java.sql.Connection getConnection() throws URISyntaxException, SQLException {
URI dbUri = new URI(System.getenv("DATABASE_URL"));
String username = dbUri.getUserInfo().split(":")[0];
String password = dbUri.getUserInfo().split(":")[1];
String dbUrl = "jdbc:postgresql://" + dbUri.getHost() + dbUri.getPath();
return DriverManager.getConnection(dbUrl, username, password);
}
}
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment