From 08944ff5a7d7b55ecb36fb870a46f0d1f909f46f Mon Sep 17 00:00:00 2001 From: Arnau Date: Thu, 12 Dec 2013 14:24:15 +0100 Subject: [PATCH 01/10] added xsanchez changes --- .../dbgen/serializer/EmptySerializer.java | 77 ++++++++++++ .../dbgen/serializer/Statistics.java | 116 ++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/EmptySerializer.java create mode 100644 ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Statistics.java diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/EmptySerializer.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/EmptySerializer.java new file mode 100644 index 000000000..5e2ef6698 --- /dev/null +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/EmptySerializer.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2013 LDBC + * Linked Data Benchmark Council (http://ldbc.eu) + * + * This file is part of ldbc_socialnet_dbgen. + * + * ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ldbc_socialnet_dbgen is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ldbc_socialnet_dbgen. If not, see . + * + * Copyright (C) 2011 OpenLink Software + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; only Version 2 of the License dated + * June 1991. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +package ldbc.socialnet.dbgen.serializer; + +import ldbc.socialnet.dbgen.objects.Comment; +import ldbc.socialnet.dbgen.objects.Group; +import ldbc.socialnet.dbgen.objects.Photo; +import ldbc.socialnet.dbgen.objects.Post; +import ldbc.socialnet.dbgen.objects.ReducedUserProfile; +import ldbc.socialnet.dbgen.objects.UserExtraInfo; + +/** + * The empty serializer does nothing. + * + * Its purpose is to avoid the serializing I/O costs in debug phases. + */ +public class EmptySerializer implements Serializer { + + public EmptySerializer() { + } + + public Long unitsGenerated() { + return 0L; + } + + public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ + } + + public void gatherData(Post post){ + } + + public void gatherData(Comment comment){ + } + + public void gatherData(Photo photo){ + } + + public void gatherData(Group group) { + } + + public void close() { + } +} diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Statistics.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Statistics.java new file mode 100644 index 000000000..46c39edb5 --- /dev/null +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Statistics.java @@ -0,0 +1,116 @@ +package ldbc.socialnet.dbgen.serializer; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; + +import ldbc.socialnet.dbgen.dictionary.LocationDictionary; + +import com.google.gson.ExclusionStrategy; +import com.google.gson.FieldAttributes; + +/** + * Container class used to store all the generator statistics. + * + */ +public class Statistics { + + /** + * This was used in early states to exclude a field of a class in the gson parser library. + * It is keep for the sake of having a quick example/place to write future field exclusions. + */ + public class StatisticsExclude implements ExclusionStrategy { + + public boolean shouldSkipClass(Class arg0) { + return false; + } + + public boolean shouldSkipField(FieldAttributes f) { + + return (f.getDeclaringClass() == CountryPair.class && f.getName().equals("population")); + } + + } + + /** + * Container to keep paired countries and their population. It is used to sort + * this pairs by population. + */ + private class CountryPair implements Comparable { + public String[] countries = new String[2]; + public Long population; + + + public int compareTo(CountryPair pair) { + return pair.population.compareTo(population); + } + } + + public Integer minPersonId; + public Integer maxPersonId; + public String minWorkFrom; + public String maxWorkFrom; + public String minPostCreationDate; + public String maxPostCreationDate; + public HashSet firstNames; + public HashSet tagNames; + public HashSet countries; + public HashSet tagClasses; + private ArrayList countryPairs; + + public Statistics() { + minPersonId = Integer.MAX_VALUE; + maxPersonId = Integer.MIN_VALUE; + firstNames = new HashSet(); + tagNames = new HashSet(); + tagClasses = new HashSet(); + countries = new HashSet(); + countryPairs = new ArrayList(); + } + + /** + * Makes all the country pairs of the countries found in the countries statistic field belonging + * to the same continent. This pairs are sorted by population. + * + * @param dicLocation The location dictionary. + */ + public void makeCountryPairs(LocationDictionary dicLocation) { + HashMap> closeCountries = new HashMap>(); + for (String s : countries) { + Integer id = dicLocation.getCountryId(s); + Integer continent = dicLocation.belongsTo(id); + if (!closeCountries.containsKey(continent)) { + closeCountries.put(continent, new ArrayList()); + } + closeCountries.get(continent).add(id); + } + + ArrayList toSort = new ArrayList(); + for (ArrayList relatedCountries : closeCountries.values()) { + for (int i = 0; i < relatedCountries.size(); i++) { + for (int j = i+1; j < relatedCountries.size(); j++) { + CountryPair pair = new CountryPair(); + pair.countries[0] = dicLocation.getLocationName(relatedCountries.get(i)); + pair.countries[1] = dicLocation.getLocationName(relatedCountries.get(j)); + pair.population = dicLocation.getPopulation(relatedCountries.get(i)) + + dicLocation.getPopulation(relatedCountries.get(j)); + toSort.add(pair); + } + } + } + Collections.sort(toSort); + for (CountryPair p : toSort) { + countryPairs.add(p.countries); + } + } + + /** + * Get gson exclusion class for the statistics. + * + * @return The exclusion class. + */ + public StatisticsExclude getExclusion() { + return new StatisticsExclude(); + } +} From 0b9414a87fbce30651bab6c08d3a96a3c1ff2263 Mon Sep 17 00:00:00 2001 From: Arnau Date: Thu, 12 Dec 2013 15:01:56 +0100 Subject: [PATCH 02/10] fixed pom --- ldbc_socialnet_dbgen/pom.xml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ldbc_socialnet_dbgen/pom.xml b/ldbc_socialnet_dbgen/pom.xml index 7fe9b31a3..1dbb29663 100644 --- a/ldbc_socialnet_dbgen/pom.xml +++ b/ldbc_socialnet_dbgen/pom.xml @@ -62,5 +62,10 @@ ssj 2.5 + + com.google.code.gson + gson + 2.2.4 + - \ No newline at end of file + From 40b34288909088637e243b44eae8f858a0660c54 Mon Sep 17 00:00:00 2001 From: Arnau Date: Thu, 12 Dec 2013 16:51:42 +0100 Subject: [PATCH 03/10] prova --- run.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100755 run.sh diff --git a/run.sh b/run.sh new file mode 100755 index 000000000..26d853fd9 --- /dev/null +++ b/run.sh @@ -0,0 +1,14 @@ +export HADOOP_HOME=/home/aprat/Programs/hadoop-1.0.3 #change to your hadoop folder +export LDBC_SOCIALNET_DBGEN_HOME=/home/aprat/Projects/LDBC/generador/fork/ldbc_socialnet_bm/ldbc_socialnet_dbgen #change to your ldbc_socialnet_dbgen folder +export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk-amd64 +export NUM_MACHINES=1 + +mkdir $LDBC_SOCIALNET_DBGEN_HOME/outputDir +mvn -f $LDBC_SOCIALNET_DBGEN_HOME/pom.xml clean +mvn -f $LDBC_SOCIALNET_DBGEN_HOME/pom.xml assembly:assembly + +# para borrar ficheros temporales +cp $LDBC_SOCIALNET_DBGEN_HOME/target/ldbc_socialnet_dbgen.jar $LDBC_SOCIALNET_DBGEN_HOME/ +rm $LDBC_SOCIALNET_DBGEN_HOME/target/ldbc_socialnet_dbgen.jar + +$HADOOP_HOME/bin/hadoop jar $LDBC_SOCIALNET_DBGEN_HOME/ldbc_socialnet_dbgen.jar input/sib output/sib $NUM_MACHINES $LDBC_SOCIALNET_DBGEN_HOME/ $LDBC_SOCIALNET_DBGEN_HOME/outputDir/ From 339f9cb7ba319938f45d4656df3233956b505792 Mon Sep 17 00:00:00 2001 From: Arnau Date: Thu, 12 Dec 2013 16:53:28 +0100 Subject: [PATCH 04/10] Xavi Sanchez changes --- ldbc_socialnet_dbgen/params.ini | 2 +- .../dbgen/dictionary/BrowserDictionary.java | 180 ++--- .../dbgen/dictionary/CompanyDictionary.java | 199 +++--- .../dbgen/dictionary/EmailDictionary.java | 131 ++-- .../dbgen/dictionary/IPAddressDictionary.java | 374 +++------- .../dbgen/dictionary/LanguageDictionary.java | 103 ++- .../dbgen/dictionary/LocationDictionary.java | 673 +++++++----------- .../dbgen/dictionary/NamesDictionary.java | 395 +++------- .../dictionary/OrganizationsDictionary.java | 188 ++--- .../dictionary/PopularPlacesDictionary.java | 190 +---- .../dbgen/dictionary/TagDictionary.java | 137 ++-- .../socialnet/dbgen/dictionary/TagMatrix.java | 80 +-- .../dbgen/dictionary/TagTextDictionary.java | 51 +- .../dbgen/dictionary/UserAgentDictionary.java | 79 +- .../dbgen/generator/DateGenerator.java | 110 +-- .../dbgen/generator/GPSGenerator.java | 4 +- .../dbgen/generator/GroupGenerator.java | 26 +- .../socialnet/dbgen/generator/MRWriter.java | 1 - .../dbgen/generator/OutputDataWriter.java | 201 ------ .../dbgen/generator/PhotoGenerator.java | 58 +- .../dbgen/generator/PowerDistGenerator.java | 42 +- .../dbgen/generator/ScalableGenerator.java | 234 +++--- .../ldbc/socialnet/dbgen/objects/Friend.java | 16 +- .../ldbc/socialnet/dbgen/objects/GPS.java | 1 + .../java/ldbc/socialnet/dbgen/objects/IP.java | 64 +- .../ldbc/socialnet/dbgen/objects/Photo.java | 8 +- .../socialnet/dbgen/objects/PhotoStream.java | 200 ------ .../ldbc/socialnet/dbgen/objects/Post.java | 8 +- .../socialnet/dbgen/objects/PostStream.java | 207 ------ .../dbgen/objects/ReducedUserProfile.java | 56 +- .../dbgen/objects/UserExtraInfo.java | 8 +- .../socialnet/dbgen/objects/UserProfile.java | 16 +- .../ldbc/socialnet/dbgen/serializer/CSV.java | 284 +++++--- .../dbgen/serializer/Serializer.java | 56 +- .../socialnet/dbgen/serializer/Turtle.java | 478 ++++++++----- .../dbgen/storage/MFStoreManager.java | 179 ----- .../dbgen/storage/StreamStoreManager.java | 22 - .../ldbc/socialnet/dbgen/util/ZOrder.java | 41 +- .../ldbc/socialnet/dbgen/vocabulary/DBP.java | 27 +- .../socialnet/dbgen/vocabulary/DBPOWL.java | 42 +- .../socialnet/dbgen/vocabulary/DBPPROP.java | 65 -- .../ldbc/socialnet/dbgen/vocabulary/FOAF.java | 32 +- .../ldbc/socialnet/dbgen/vocabulary/RDF.java | 53 +- .../ldbc/socialnet/dbgen/vocabulary/RDFS.java | 51 +- .../ldbc/socialnet/dbgen/vocabulary/SN.java | 85 ++- .../socialnet/dbgen/vocabulary/SNVOC.java | 117 +-- .../ldbc/socialnet/dbgen/vocabulary/XSD.java | 58 +- .../src/main/resources/dictionaries/email.txt | 8 +- 48 files changed, 2058 insertions(+), 3582 deletions(-) delete mode 100644 ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/OutputDataWriter.java delete mode 100644 ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/PhotoStream.java delete mode 100644 ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/PostStream.java delete mode 100644 ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/storage/MFStoreManager.java delete mode 100644 ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBPPROP.java diff --git a/ldbc_socialnet_dbgen/params.ini b/ldbc_socialnet_dbgen/params.ini index 57b7cd48a..b9714b38b 100644 --- a/ldbc_socialnet_dbgen/params.ini +++ b/ldbc_socialnet_dbgen/params.ini @@ -1,4 +1,4 @@ numtotalUser:10000 startYear:2010 numYears:1 -serializerType:ttl +serializerType:csv diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/BrowserDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/BrowserDictionary.java index c2c8a8e62..380d73f3d 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/BrowserDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/BrowserDictionary.java @@ -37,160 +37,106 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.RandomAccessFile; import java.util.Random; import java.util.Vector; +/** + * This class reads the file containing the names and distributions for the browsers used in the ldbc socialnet generation and + * provides access methods to get such data. + */ public class BrowserDictionary { + private static final String SEPARATOR = " "; + + Vector vBrowser; Vector vBrowserCummulative; - Vector vBrowser; - Random randBrowsers; - BufferedReader browserDictionary; - String browserDicFileName; - - int totalNumBrowsers; + String fileName; - double probAnotherBrowser; // Probability that a user uses another browser - Random randDifBrowser; // whether user change to another browser or not + double probAnotherBrowser; + Random randDifBrowser; + Random randBrowsers; - public BrowserDictionary(String _browserDicFileName, long seedBrowser,double _probAnotherBrowser){ - randBrowsers = new Random(seedBrowser); - randDifBrowser = new Random(seedBrowser); - browserDicFileName = _browserDicFileName; - probAnotherBrowser = _probAnotherBrowser; + /** + * Creator. + * + * @param fileName: The file which contains the browser data. + * @param seedBrowser: Seed for the browsers random selection. + * @param probAnotherBrowser: Probability of the user using another browser. + */ + public BrowserDictionary(String fileName, long seedBrowser, double probAnotherBrowser){ + this.fileName = fileName; + this.probAnotherBrowser = probAnotherBrowser; + + randBrowsers = new Random(seedBrowser); + randDifBrowser = new Random(seedBrowser); } + /** + * Initializes the dictionary extracting the data from the file. + */ public void init(){ try { - browserDictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(browserDicFileName), "UTF-8")); + BufferedReader dictionary = new BufferedReader( + new InputStreamReader(getClass( ).getResourceAsStream(fileName), "UTF-8")); vBrowser = new Vector(); vBrowserCummulative = new Vector(); - browsersExtract(); - - browserDictionary.close(); - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - public void browsersExtract(){ - String browser; - double cumdistribution = 0.0; //cummulative distribution value - String line; - int i = 0; - totalNumBrowsers = 0; + String line; + double cummulativeDist = 0.0; - try { - while ((line = browserDictionary.readLine()) != null){ - String infos[] = line.split(" "); - browser = infos[0]; - cumdistribution = cumdistribution + Double.parseDouble(infos[1]); - vBrowser.add(browser); - //System.out.println(cumdistribution); - vBrowserCummulative.add(cumdistribution); - i++; - - totalNumBrowsers++; - } - + while ((line = dictionary.readLine()) != null){ + String data[] = line.split(SEPARATOR); + String browser = data[0]; + cummulativeDist += Double.parseDouble(data[1]); + vBrowser.add(browser); + vBrowserCummulative.add(cummulativeDist); + } + dictionary.close(); System.out.println("Done ... " + vBrowser.size() + " browsers were extracted"); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } } - public String getRandomBrowser(){ - double prob = randBrowsers.nextDouble(); - - int minIdx = 0; - int maxIdx = totalNumBrowsers - 1; - - if (prob < vBrowserCummulative.get(minIdx)){ - return vBrowser.get(minIdx); - } - - while ((maxIdx - minIdx) > 1){ - - if (prob > vBrowserCummulative.get(minIdx + (maxIdx - minIdx)/2)){ - minIdx = minIdx + (maxIdx - minIdx)/2; - } - else{ - maxIdx = minIdx + (maxIdx - minIdx)/2; - } - } - - return vBrowser.get(maxIdx); - } + /** + * Gets the browser name. + */ + public String getName(byte id) { + return vBrowser.get(id); + } - public String getBrowserName(byte browserId){ - return vBrowser.get(browserId); - } - public byte getRandomBrowserId(){ - double prob = randBrowsers.nextDouble(); + /** + * Gets a random browser id. + */ + public byte getRandomBrowserId() { + double prob = randBrowsers.nextDouble(); + int minIdx = 0; - int maxIdx = totalNumBrowsers - 1; - - if (prob < vBrowserCummulative.get(minIdx)){ - return (byte)minIdx; - } + int maxIdx = (prob < vBrowserCummulative.get(minIdx)) ? minIdx : vBrowserCummulative.size() - 1; - while ((maxIdx - minIdx) > 1){ + while ((maxIdx - minIdx) > 1) { - if (prob > vBrowserCummulative.get(minIdx + (maxIdx - minIdx)/2)){ - minIdx = minIdx + (maxIdx - minIdx)/2; - } - else{ - maxIdx = minIdx + (maxIdx - minIdx)/2; + int middlePoint = minIdx + (maxIdx - minIdx) / 2; + if (prob > vBrowserCummulative.get(middlePoint)) { + minIdx = middlePoint; + } else { + maxIdx = middlePoint; } } return (byte)maxIdx; } + /** + * Gets the post browser. There is a chance of being different from the user preferred browser + * @param userBrowserId: The user preferred browser. + */ public byte getPostBrowserId(byte userBrowserId){ double prob = randDifBrowser.nextDouble(); - if (prob < probAnotherBrowser){ - return getRandomBrowserId(); - } - else{ - return userBrowserId; - } - } - public byte getCommentBrowserId(byte userBrowserId){ - double prob = randDifBrowser.nextDouble(); - if (prob < probAnotherBrowser){ - return getRandomBrowserId(); - } - else{ - return userBrowserId; - } - } - - public String getBrowserForAUser(String originalBrowser){ - double prob = randDifBrowser.nextDouble(); - if (prob < probAnotherBrowser){ - return getRandomBrowser(); - } - else{ - return originalBrowser; - } + return (prob < probAnotherBrowser) ? getRandomBrowserId() : userBrowserId; } - - public Vector getvBrowser() { - return vBrowser; - } - - public void setvBrowser(Vector vBrowser) { - this.vBrowser = vBrowser; - } - } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/CompanyDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/CompanyDictionary.java index f77651f7e..7d7393069 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/CompanyDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/CompanyDictionary.java @@ -37,129 +37,118 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.RandomAccessFile; import java.util.HashMap; import java.util.Random; import java.util.Vector; +/** + * This class reads the file containing the names and countries for the companies used in the ldbc socialnet generation and + * provides access methods to get such data. + */ public class CompanyDictionary { - BufferedReader dictionary; - String dicFileName; - - HashMap locationNames; + + private static final String SEPARATOR = " "; + + String fileName; - Vector> companiesByLocations; HashMap companyLocation; - Random rand; - Random randUnRelatedCompany; - double probUnCorrelatedCompany; - Random randUnRelatedLocation; + HashMap> companiesByLocations; LocationDictionary locationDic; - public CompanyDictionary(String _dicFileName, LocationDictionary _locationDic, - long seedRandom, double _probUnCorrelatedCompany){ - this.locationNames = _locationDic.getLocationNameMapping(); - this.dicFileName = _dicFileName; - this.rand = new Random(seedRandom); - this.randUnRelatedCompany = new Random(seedRandom); - this.randUnRelatedLocation = new Random(seedRandom); - this.locationDic = _locationDic; - - this.probUnCorrelatedCompany = _probUnCorrelatedCompany; - } + Random rand; + Random randUnRelatedCompany; + Random randUnRelatedLocation; + double probUnCorrelatedCompany; - public void init(){ - try { - companyLocation = new HashMap(); - dictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(dicFileName), "UTF-8")); - - System.out.println("Building dictionary of companies (by locations)"); - - companiesByLocations = new Vector>(locationNames.size()); - for (int i = 0; i < locationNames.size(); i++){ - companiesByLocations.add(new Vector()); - } - - extractCompanyNames(); - - dictionary.close(); - - } catch (IOException e) { - e.printStackTrace(); - } + /** + * Constructor. + * + * @param fileName: The file with the company data. + * @param locationDic: The location dictionary. + * @param seed: Seed for the random selector. + * @param probUnCorrelatedCompany: Probability of selecting a country unrelated company. + */ + public CompanyDictionary(String fileName, LocationDictionary locationDic, + long seed, double probUnCorrelatedCompany) { + + this.fileName = fileName; + this.locationDic = locationDic; + this.probUnCorrelatedCompany = probUnCorrelatedCompany; + + rand = new Random(seed); + randUnRelatedCompany = new Random(seed); + randUnRelatedLocation = new Random(seed); } - - public void extractCompanyNames(){ - String line; - String locationName; - String companyName; - String lastLocationName = ""; - int curLocationId = -1; - int totalNumCompanies = 0; - try { - while ((line = dictionary.readLine()) != null){ - String infos[] = line.split(" "); - locationName = infos[0]; - if (locationName.compareTo(lastLocationName) != 0){ // New location - if (locationNames.containsKey(locationName)){ // Check whether it exists - lastLocationName = locationName; - curLocationId = locationNames.get(locationName); - companyName = infos[1].trim(); - companiesByLocations.get(curLocationId).add(companyName); - companyLocation.put(companyName, curLocationId); - totalNumCompanies++; - } - } else{ - companyName = infos[1].trim(); - companiesByLocations.get(curLocationId).add(companyName); - companyLocation.put(companyName, curLocationId); - totalNumCompanies++; - } - } - - System.out.println("Done ... " + totalNumCompanies + " companies were extracted"); - - } catch (IOException e) { - e.printStackTrace(); - } + /** + * Initializes the dictionary extracting the data from the file. + */ + public void init() { + companyLocation = new HashMap(); + + companiesByLocations = new HashMap>(); + for (Integer id : locationDic.getCountries()){ + companiesByLocations.put(id, new Vector()); + } + + try { + BufferedReader dictionary = new BufferedReader( + new InputStreamReader(getClass( ).getResourceAsStream(fileName), "UTF-8")); + String line; + int previousId = -2; + int currentId = -1; + int totalNumCompanies = 0; + + while ((line = dictionary.readLine()) != null) { + String data[] = line.split(SEPARATOR); + String locationName = data[0]; + String companyName = data[1].trim(); + if (locationDic.getCountryId(locationName) != LocationDictionary.INVALID_LOCATION) { + if (currentId != previousId) { + currentId = locationDic.getCountryId(locationName); + previousId = currentId; + } + companiesByLocations.get(currentId).add(companyName); + companyLocation.put(companyName, currentId); + totalNumCompanies++; + } + } + dictionary.close(); + System.out.println("Done ... " + totalNumCompanies + " companies were extracted"); + + } catch (IOException e) { + e.printStackTrace(); + } } - - public HashMap getCompanyLocationMap() { - return companyLocation; + + /** + * Gets the company country id. + */ + public int getCountry(String company) { + return companyLocation.get(company); } - // Get a random company from that location - // if that location does not have any company, go to another location - public String getRandomCompany(int _locationId){ - String company = ""; - int randomCompanyIdx; - int locationId = _locationId; - - if (randUnRelatedCompany.nextDouble() > probUnCorrelatedCompany){ - while (companiesByLocations.get(locationId).size() == 0){ - locationId = randUnRelatedLocation.nextInt(locationNames.size()); - } + /** + * Gets a random company of the input country. In case the given country doesn't any company + * a random one will be selected. + * @param countryId: A country id. + */ + public String getRandomCompany(int countryId) { + int locId = countryId; - randomCompanyIdx = rand.nextInt(companiesByLocations.get(locationId).size()); - company = companiesByLocations.get(locationId).get(randomCompanyIdx); - return company; + Vector countries = locationDic.getCountries(); + if (randUnRelatedCompany.nextDouble() <= probUnCorrelatedCompany) { + locId = countries.get(randUnRelatedLocation.nextInt(countries.size())); } - else{ // Randomly select one company out of the location - int uncorrelateLocationIdx = randUnRelatedLocation.nextInt(locationNames.size()); - while (companiesByLocations.get(uncorrelateLocationIdx).size() == 0){ - uncorrelateLocationIdx = randUnRelatedLocation.nextInt(locationNames.size()); - } - - - randomCompanyIdx = rand.nextInt(companiesByLocations.get(uncorrelateLocationIdx).size()); - company = companiesByLocations.get(uncorrelateLocationIdx).get(randomCompanyIdx); - - return company; - } - + + // In case the country doesn't have any company select another country. + while (companiesByLocations.get(locId).size() == 0){ + locId = countries.get(randUnRelatedLocation.nextInt(countries.size())); + } + + int randomCompanyIdx = rand.nextInt(companiesByLocations.get(locId).size()); + return companiesByLocations.get(locId).get(randomCompanyIdx); } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/EmailDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/EmailDictionary.java index a30a2f181..5b37b8ae7 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/EmailDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/EmailDictionary.java @@ -37,108 +37,97 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.RandomAccessFile; -import java.util.HashMap; import java.util.Random; import java.util.Vector; -import ldbc.socialnet.dbgen.objects.Location; - - -// Dictionary of emails contains five top email domains which their popularities and list of 460 free emails +/** + * This class reads the file containing the email domain and its popularity used in the ldbc socialnet generation and + * provides access methods to get such data. + */ public class EmailDictionary { - Vector vTopEmailCummulative; - double randomFreeEmailCummulative; + + private static final String SEPARATOR = " "; + + Vector emails; + Vector topEmailCummulative; - Vector vEmail; - Random randEmail; - Random randIdx; - int numTopEmail = 5; + String fileName; - int totalNumEmail = 0; - BufferedReader emailDictionary; - String emailDicFileName; - public EmailDictionary(String _emailDicFileName, long seedEmail){ - randEmail = new Random(seedEmail); - randIdx = new Random(seedEmail); - emailDicFileName = _emailDicFileName; + Random randIdx; + Random randEmail; + + /** + * Constructor. + * + * @param fileName: The file with the email data. + * @param seed: Seed for the random selector. + */ + public EmailDictionary(String fileName, long seed) { + this.fileName = fileName; + + randIdx = new Random(seed); + randEmail = new Random(seed); } + /** + * Initializes the dictionary with the file data. + */ public void init(){ try { - emailDictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(emailDicFileName), "UTF-8")); - vTopEmailCummulative = new Vector(); - vEmail = new Vector(); - - emailExtract(); - - emailDictionary.close(); - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - public void emailExtract(){ - String emailDomain; - double cumdistribution = 0.0; //cummulative distribution value - String line; - int i = 0; - + BufferedReader emailDictionary = new BufferedReader( + new InputStreamReader(getClass( ).getResourceAsStream(fileName), "UTF-8")); + + emails = new Vector(); + topEmailCummulative = new Vector(); - try { + String line; + double cummulativeDist = 0.0; while ((line = emailDictionary.readLine()) != null){ - if (i < numTopEmail){ - String infos[] = line.split(" "); - emailDomain = infos[0]; - cumdistribution = cumdistribution + Double.parseDouble(infos[1]); - vEmail.add(emailDomain); - vTopEmailCummulative.add(cumdistribution); - i++; - } - else - vEmail.add(line); - - totalNumEmail++; + String data[] = line.split(SEPARATOR); + emails.add(data[0]); + if (data.length == 2) { +// System.out.println(line); + cummulativeDist += Double.parseDouble(data[1]); + topEmailCummulative.add(cummulativeDist); + } } - - System.out.println("Done ... " + vEmail.size() + " email domains were extracted"); + emailDictionary.close(); + System.out.println("Done ... " + emails.size() + " email domains were extracted"); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } } + /** + * Gets a random email domain based on its popularity. + */ public String getRandomEmail(){ - double prob = randEmail.nextDouble(); - int idx = 0; int minIdx = 0; - int maxIdx = numTopEmail - 1; - if (prob > vTopEmailCummulative.get(maxIdx)){ - //Randomly select one email from non-top email - idx = randIdx.nextInt(totalNumEmail - numTopEmail) + numTopEmail; - return vEmail.get(idx); - } - if (prob < vTopEmailCummulative.get(minIdx)){ - return vEmail.get(minIdx); + int maxIdx = topEmailCummulative.size() - 1; + + double prob = randEmail.nextDouble(); + if (prob > topEmailCummulative.get(maxIdx)){ + int Idx = randIdx.nextInt(emails.size() - topEmailCummulative.size()) + topEmailCummulative.size(); + return emails.get(Idx); + } else if (prob < topEmailCummulative.get(minIdx)){ + return emails.get(minIdx); } while ((maxIdx - minIdx) > 1){ - if (prob > vTopEmailCummulative.get(minIdx + (maxIdx - minIdx)/2)){ - minIdx = minIdx + (maxIdx - minIdx)/2; - } - else{ - maxIdx = minIdx + (maxIdx - minIdx)/2; + int middlePoint = minIdx + (maxIdx - minIdx) / 2; + if (prob > topEmailCummulative.get(middlePoint)){ + minIdx = middlePoint; + } else { + maxIdx = middlePoint; } } - return vEmail.get(maxIdx); + return emails.get(maxIdx); } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/IPAddressDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/IPAddressDictionary.java index 7fcb2d83b..1972a6532 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/IPAddressDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/IPAddressDictionary.java @@ -37,50 +37,49 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.RandomAccessFile; import java.util.HashMap; import java.util.Random; import java.util.Vector; import ldbc.socialnet.dbgen.generator.DateGenerator; -import ldbc.socialnet.dbgen.objects.Comment; import ldbc.socialnet.dbgen.objects.IP; -import ldbc.socialnet.dbgen.objects.Location; -import ldbc.socialnet.dbgen.objects.Photo; -import ldbc.socialnet.dbgen.objects.Post; public class IPAddressDictionary { - Vector> vIPDic; // Store the IP ranges by the countries - - HashMap countryAbbreMap; - - Vector vLocation; + + private static final String SEPARATOR_COUNTRY = " "; + private static final String SEPARATOR_IP = "[.]"; + private static final String SEPARATOR_MASK = "/"; + private static final int MAX_IP_COUNTRY = 100; + + + + HashMap> ipsByCountry; + HashMap ipCountry; + + LocationDictionary locationDic; String mappingFileName; - String baseIPdir; - int maxNumIPRanges = 100; + String baseIPdir; - Random randIP; - double probDiffIPinTravelSeason; - double probDiffIPnotTravelSeason; - double probDiffIPforTraveller; - Random randDiffIP; - Random randDiffIPforTravellers; + Random randIP; + Random randDiffIP; + Random randDiffIPforTravellers; + double probDiffIPinTravelSeason; + double probDiffIPnotTravelSeason; + double probDiffIPforTraveller; - public IPAddressDictionary(String _mappingFileName, String _baseIPdir, Vector _vLocation, + public IPAddressDictionary(String _mappingFileName, String _baseIPdir, LocationDictionary locationDic, long seedIP, double _probDiffIPinTravelSeason, double _probDiffIPnotTravelSeason, double _probDiffIPforTraveller){ this.mappingFileName = _mappingFileName; this.baseIPdir = _baseIPdir; - countryAbbreMap = new HashMap(); - - vLocation = _vLocation; - vIPDic = new Vector>(); + this.locationDic = locationDic; + ipCountry = new HashMap(); + ipsByCountry = new HashMap>(); probDiffIPinTravelSeason = _probDiffIPinTravelSeason; probDiffIPnotTravelSeason = _probDiffIPnotTravelSeason; @@ -91,263 +90,104 @@ public IPAddressDictionary(String _mappingFileName, String _baseIPdir, Vector()); - - //Get the name of file - String fileName = countryAbbreMap.get(vLocation.get(i).getName()); - fileName = baseIPdir + "/" + fileName + ".zone"; - - int j = 0; - try { - BufferedReader ipZoneFile = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(fileName), "UTF-8")); - - //System.out.println(fileName); - - while ((line = ipZoneFile.readLine()) != null){ - IPRange iprange = new IPRange(); - line = line.replace(".", " "); - String infos[] = line.split(" "); - //System.out.println(line); - iprange.setIp1(Short.parseShort(infos[0])); - iprange.setIp2(Short.parseShort(infos[1])); - iprange.setIp3(Short.parseShort(infos[2])); - - String ranges[] = infos[3].split("/"); - - short ip4first = Short.parseShort(ranges[0]); - short ip4second = Short.parseShort(ranges[1]); - if (ip4first < ip4second){ - iprange.setIp4start(ip4first); - iprange.setIp4end(ip4second); - } - else{ - iprange.setIp4end(ip4first); - iprange.setIp4start(ip4second); - } - - - - vIPDic.get(i).add(iprange); - - j++; - if (j == maxNumIPRanges) break; - } - - ipZoneFile.close(); - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - } - } - - public void checkCountryNameExistence(){ - for (int i = 0; i < vLocation.size(); i ++){ - String countryName = vLocation.get(i).getName(); - if (!countryAbbreMap.containsKey(countryName)){ - System.out.println("Country " + countryName + " is not in the mapping file"); - } - } - } - - public int getLocation(IP ip) { - for (int i = 0; i < vIPDic.size(); i ++) { - for (int j = 0; j < vIPDic.get(i).size(); j++) { - IPRange target = vIPDic.get(i).get(j); - if (ip.getIp1() == target.getIp1() && ip.getIp2() == target.getIp2() && - ip.getIp3() == target.getIp3() && ip.getIp4() >= target.getIp4start() && - ip.getIp4() <= target.getIp4end()) { - return i; - } + public void initialize() { + String line; + HashMap countryAbbreMap = new HashMap(); + try { + BufferedReader mappingFile = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(mappingFileName), "UTF-8")); + while ((line = mappingFile.readLine()) != null){ + String data[] = line.split(SEPARATOR_COUNTRY); + String abbr = data[0]; + String countryName = data[1].trim().replace(" ", "_"); + countryAbbreMap.put(countryName, abbr); } + mappingFile.close(); + + Vector countries = locationDic.getCountries(); + for (int i = 0; i < countries.size(); i ++) { + ipsByCountry.put(countries.get(i), new Vector()); + + //Get the name of file + String fileName = countryAbbreMap.get(locationDic.getLocationName(countries.get(i))); + fileName = baseIPdir + "/" + fileName + ".zone"; + BufferedReader ipZoneFile = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(fileName), "UTF-8")); + + int j = 0; + while ((line = ipZoneFile.readLine()) != null && (j < MAX_IP_COUNTRY)) { + String data[] = line.split(SEPARATOR_IP); + String maskData[] = data[3].split(SEPARATOR_MASK); + int byte1 = Integer.valueOf(data[0]); + int byte2 = Integer.valueOf(data[1]); + int byte3 = Integer.valueOf(data[2]); + int byte4 = Integer.valueOf(maskData[0]); + int maskNum = Integer.valueOf(maskData[1]); + + IP ip = new IP(byte1, byte2, byte3, byte4, maskNum); + + ipsByCountry.get(i).add(ip); + ipCountry.put(ip.getIp() & ~ip.getMask(), i); + j++; + } + ipZoneFile.close(); + } + } catch (IOException e) { + e.printStackTrace(); } - return -1; + } + + public int getLocation(IP ip) { + int network = ip.getIp() & ~ip.getMask(); + return (ipCountry.containsKey(network)) ? ipCountry.get(network) : -1; } - public IP getRandomIPAddressFromLocation(int locationIdx){ - Vector countryIPs = vIPDic.get(locationIdx); + public IP getRandomIPFromLocation(int locationIdx) { + Vector countryIPs = ipsByCountry.get(locationIdx); int idx = randIP.nextInt(countryIPs.size()); - IPRange iprange = countryIPs.get(idx); - short ip4 =-1; - if (iprange.ip4end == iprange.ip4start) ip4 = iprange.ip4start; - else{ - try { - ip4 = (short)(randIP.nextInt(iprange.ip4end - iprange.ip4start) + iprange.ip4start); - } catch (Exception e) { - System.out.println(" iprange.ip4end = " + iprange.ip4end); - System.out.println(" iprange.ip4start = " + iprange.ip4start); - e.printStackTrace(); - System.exit(-1); - } - } + IP networkIp = countryIPs.get(idx); - IP ip = new IP(iprange.ip1,iprange.ip2,iprange.ip3,ip4); - return ip; - } - public IP getRandomIP(){ - int randomLocationIdx = randIP.nextInt(vLocation.size()); - return getRandomIPAddressFromLocation(randomLocationIdx); - } - - // Only 1% users (i.e., users with frequent-change property) can have variation of IP address - // Other users have the IP addresses correlated with the location - // For these 1% users, probability that they have different IP address is 0.5 - - public void setPostIPAdress(boolean isFrequentChange, IP ipAdress, Post post){ + int formattedIP = 0; + int ip = networkIp.getIp(); + int mask = networkIp.getMask(); - // Check whether the posting time is on a vacation season - if (isFrequentChange ) - { - if (randDiffIPforTravellers.nextDouble() < probDiffIPforTraveller){ - post.setIpAddress(getRandomIP()); - } - } - else{ - // check whether it is a travel season - if (DateGenerator.isTravelSeason(post.getCreatedDate())){ - if (randDiffIP.nextDouble() < probDiffIPinTravelSeason){ - post.setIpAddress(getRandomIP()); - return; - } - } - else{ - if (randDiffIP.nextDouble() < probDiffIPnotTravelSeason){ - post.setIpAddress(getRandomIP()); - return; - } - } - } + for (int i = 0; i < IP.IP4_SIZE_BYTES; i++) { + int randomRange = ((mask >>> (IP.BYTE_SIZE * i)) & 0xFF) + 1; + int base = ((ip >>> (IP.BYTE_SIZE * i)) & 0xFF) + randIP.nextInt(randomRange); + formattedIP = formattedIP | (base << IP.BYTE_SIZE * i); + } - post.setIpAddress(ipAdress); + return new IP(formattedIP, mask); } - public void setCommentIPAdress(boolean isFrequentChange, IP ipAdress, Comment comment){ - - // Check whether the posting time is on a vacation season - if (isFrequentChange ) - { - if (randDiffIPforTravellers.nextDouble() < probDiffIPforTraveller){ - comment.setIpAddress(getRandomIP()); - } - } - else{ - // check whether it is a travel season - if (DateGenerator.isTravelSeason(comment.getCreateDate())){ - if (randDiffIP.nextDouble() < probDiffIPinTravelSeason){ - comment.setIpAddress(getRandomIP()); - return; - } - } - else{ - if (randDiffIP.nextDouble() < probDiffIPnotTravelSeason){ - comment.setIpAddress(getRandomIP()); - return; - } - } - } - - comment.setIpAddress(ipAdress); + public IP getRandomIP() { + Vector countries = locationDic.getCountries(); + int randomLocationIdx = randIP.nextInt(countries.size()); + return getRandomIPFromLocation(randomLocationIdx); } - public void setPhotoIPAdress(boolean isFrequentChange, IP ipAdress, Photo photo){ - - // Check whether the posting time is on a vacation season - if (isFrequentChange ) - { - if (randDiffIPforTravellers.nextDouble() < probDiffIPforTraveller){ - photo.setIpAddress(getRandomIPAddressFromLocation(photo.getLocationIdx())); - } - } - else{ - // check whether it is a travel season - if (DateGenerator.isTravelSeason(photo.getTakenTime())){ - if (randDiffIP.nextDouble() < probDiffIPinTravelSeason){ - photo.setIpAddress(getRandomIPAddressFromLocation(photo.getLocationIdx())); - return; - } - } - else{ - if (randDiffIP.nextDouble() < probDiffIPnotTravelSeason){ - photo.setIpAddress(getRandomIPAddressFromLocation(photo.getLocationIdx())); - return; - } - } - } - - photo.setIpAddress(ipAdress); + private boolean changeUsualIp(boolean isFrequentChange, long date) { + boolean change = false; + if (isFrequentChange) { + if (randDiffIPforTravellers.nextDouble() < probDiffIPforTraveller) { + change = true; + } + } else { + if (DateGenerator.isTravelSeason(date)) { + if (randDiffIP.nextDouble() < probDiffIPinTravelSeason) { + change = true; + } + } else if (randDiffIP.nextDouble() < probDiffIPnotTravelSeason) { + change = true; + } + } + return change; } -} - -class IPRange{ - short ip1; - short ip2; - short ip3; - short ip4start; - short ip4end; - - public short getIp1() { - return ip1; - } - public void setIp1(short ip1) { - this.ip1 = ip1; - } - public short getIp2() { - return ip2; - } - public void setIp2(short ip2) { - this.ip2 = ip2; - } - public short getIp3() { - return ip3; - } - public void setIp3(short ip3) { - this.ip3 = ip3; - } - public short getIp4start() { - return ip4start; - } - public void setIp4start(short ip4start) { - this.ip4start = ip4start; - } - public short getIp4end() { - return ip4end; - } - public void setIp4end(short ip4end) { - this.ip4end = ip4end; + public IP getIP(IP ip, boolean isFrequentChange, long date) { + return (changeUsualIp(isFrequentChange, date)) ? new IP(ip.getIp(), ip.getMask()) : getRandomIP(); } + + public IP getIP(IP ip, boolean isFrequentChange, long date, int countryId) { + return (changeUsualIp(isFrequentChange, date)) ? new IP(ip.getIp(), ip.getMask()) : getRandomIPFromLocation(countryId); + } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LanguageDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LanguageDictionary.java index 86a4877f1..5d7bf45e5 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LanguageDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LanguageDictionary.java @@ -37,44 +37,33 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; -import java.io.PrintStream; -import java.io.RandomAccessFile; -import java.io.UnsupportedEncodingException; -import java.lang.reflect.Array; -import java.util.Arrays; import java.util.HashMap; -import java.util.TreeSet; -import java.util.Iterator; import java.util.Random; import java.util.Vector; -import ldbc.socialnet.dbgen.dictionary.NamesDictionary.NameFreq; -import ldbc.socialnet.dbgen.objects.Location; -import ldbc.socialnet.dbgen.util.ZOrder; - public class LanguageDictionary { + private static final String SEPARATOR = " "; + private static final String ISO_ENGLISH_CODE = "en"; + Vector languages; - Vector> officalLanguagesFromCountries; - Vector> languagesFromCountries; + HashMap> officalLanguagesFromCountries; + HashMap> languagesFromCountries; - HashMap countryNames; - BufferedReader dictionary; + LocationDictionary locationDic; String dicFile; double probEnglish; double probSecondLang; Random rand; - public LanguageDictionary(String dicFile, HashMap countryNames, + public LanguageDictionary(String dicFile, LocationDictionary locationDic, double probEnglish, double probSecondLang, long seed){ this.dicFile = dicFile; - this.countryNames = countryNames; + this.locationDic = locationDic; this.probEnglish = probEnglish; this.probSecondLang = probSecondLang; @@ -84,53 +73,41 @@ public LanguageDictionary(String dicFile, HashMap countryNames public void init(){ try { languages = new Vector(); - officalLanguagesFromCountries = new Vector>(); - languagesFromCountries = new Vector>(); - for (int i = 0; i < countryNames.size(); i++) { - officalLanguagesFromCountries.add(new Vector()); - languagesFromCountries.add(new Vector()); + officalLanguagesFromCountries = new HashMap>(); + languagesFromCountries = new HashMap>(); + for (Integer id : locationDic.getCountries()) { + officalLanguagesFromCountries.put(id, new Vector()); + languagesFromCountries.put(id, new Vector()); } - - dictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(dicFile), "UTF-8")); - extractLanguages(); + BufferedReader dictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(dicFile), "UTF-8")); + + String line; + while ((line = dictionary.readLine()) != null) { + String data[] = line.split(SEPARATOR); + if (locationDic.getCountryId(data[0]) != LocationDictionary.INVALID_LOCATION) { + for (int i = 1; i < data.length; i++) { + Integer countryId = locationDic.getCountryId(data[0]); + String languageData[] = data[i].split(" "); + Integer id = languages.indexOf(languageData[0]); + if (id == -1) { + id = languages.size(); + languages.add(languageData[0]); + } + if (languageData.length == 3) { + officalLanguagesFromCountries.get(countryId).add(id); + } + languagesFromCountries.get(countryId).add(id); + } + } + } + dictionary.close(); - dictionary.close(); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } } - public void extractLanguages() - { - try { - String line; - while ((line = dictionary.readLine()) != null) { - String splitted[] = line.split(" "); - if (countryNames.containsKey(splitted[0])) { - for (int i = 1; i < splitted.length; i++) { - Integer countryId = countryNames.get(splitted[0]); - String languageData[] = splitted[i].split(" "); - Integer id = languages.indexOf(languageData[0]); - if (id == -1) { - id = languages.size(); - languages.add(languageData[0]); - } - if (languageData.length == 3) { - officalLanguagesFromCountries.get(countryId).add(id); - } - languagesFromCountries.get(countryId).add(id); - } - } - } - System.out.println("Extracted " + languages.size() + " languages"); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - public String getLanguagesName(int languageId) { if (languageId < 0 || languageId >= languages.size()) { System.err.println("Trying to acces the invalid language with id="+languageId); @@ -148,8 +125,8 @@ public Vector getLanguages(int locationId) { int id = rand.nextInt(languagesFromCountries.get(locationId).size()); langSet.add(languagesFromCountries.get(locationId).get(id)); } - double prob = rand.nextDouble(); - if (prob < probSecondLang) { + + if (rand.nextDouble() < probSecondLang) { int id = rand.nextInt(languagesFromCountries.get(locationId).size()); if (langSet.indexOf(languagesFromCountries.get(locationId).get(id)) == -1) { langSet.add(languagesFromCountries.get(locationId).get(id)); @@ -158,11 +135,11 @@ public Vector getLanguages(int locationId) { return langSet; } + public Integer getInternationlLanguage() { Integer languageId = -1; - double prob = rand.nextDouble(); - if (prob < probEnglish) { - languageId = languages.indexOf("en"); + if (rand.nextDouble() < probEnglish) { + languageId = languages.indexOf(ISO_ENGLISH_CODE); } return languageId; } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LocationDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LocationDictionary.java index 2eb882ffe..b942e76ff 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LocationDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LocationDictionary.java @@ -37,497 +37,352 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; -import java.io.PrintStream; -import java.io.RandomAccessFile; -import java.io.UnsupportedEncodingException; -import java.lang.reflect.Array; import java.util.Arrays; import java.util.HashMap; import java.util.Random; import java.util.Vector; -import ldbc.socialnet.dbgen.dictionary.NamesDictionary.NameFreq; import ldbc.socialnet.dbgen.objects.Location; import ldbc.socialnet.dbgen.util.ZOrder; - +/** + * This class reads the files containing the country data and city data used in the ldbc socialnet generation and + * provides access methods to get such data. + * Most of the users has the prerequisite of requiring a valid location id. + */ public class LocationDictionary { - int numberOfUsers; - Vector vecLocationDistribution; // Store the number of people in each location - - Vector vecCountry; // Countries - Vector vecCities; // Cities - Vector vecContinents; // Continents - Location earth; - HashMap cityCountry; // cityId -> countryId - HashMap countryContinent; //countryId -> continentId - Vector> vecCountryCities; // countryId -> cityId - Vector> vecContinentCountries; // continentIds -> countyId - - BufferedReader dictionary; - String dicCountryFile; - String dicCityFile; - HashMap countryNameMapping; //Mapping from a Country location name to a id - HashMap citiesNameMapping; //Mapping from a city location name to a id - HashMap continentsNameMapping; //Mapping from a continent location name to a id - - boolean isCummulativeDist = false; // Store the vecLocationDistribution according to cumulative values - int countNumOfSameLocation = 0; - int curLocationIdx = 0; + public static final int INVALID_LOCATION = -1; + private static final String SEPARATOR = " "; + private static final String SEPARATOR_CITY = " "; + + int numUsers; + int curLocationIdx; + Random rand; LocationZorder[] sortLocation; + Vector locationDistribution; + + String cityFile; + String countryFile; + + Vector countries; + HashMap locations; + HashMap isPartOf; + HashMap> citiesFromCountry; - Random rand; + HashMap cityNames; + HashMap countryNames; - public LocationDictionary(int _numberOfUsers, long seed, String dicCountryFile, String dicCityFile){ - this.numberOfUsers = _numberOfUsers; - this.dicCountryFile = dicCountryFile; - this.dicCityFile = dicCityFile; + /** + * Private class used to sort countries by their z-order value. + */ + private class LocationZorder implements Comparable { - rand = new Random(seed); + public int id; + public Integer zvalue; + + public LocationZorder(int id, int zvalue) { + this.id = id; + this.zvalue = zvalue; + } + + public int compareTo(LocationZorder obj) { + return zvalue.compareTo(obj.zvalue); + } } - public HashMap getLocationNameMapping() { - return countryNameMapping; - } - - public void setLocationNameMapping(HashMap locationNameMapping) { - this.countryNameMapping = locationNameMapping; - } + /** + * Creator. + * + * @param numUsers: The total number of users. + * @param seed: The random selector seed. + * @param countryFile: The country and continent data file. + * @param cityFile: The city data file. + */ + public LocationDictionary(int numUsers, long seed, String countryFile, String cityFile){ + this.numUsers = numUsers; + this.countryFile = countryFile; + this.cityFile = cityFile; + + rand = new Random(seed); + } - public Vector getVecLocationDistribution() { - return vecLocationDistribution; - } - public void setVecLocationDistribution(Vector vecLocationDistribution) { - this.vecLocationDistribution = vecLocationDistribution; - } - - public Vector getVecLocations() { - return vecCountry; - } - - public void setVecLocations(Vector vecLocations) { - this.vecCountry = vecLocations; + /** + * Gets a list of the country ids. + */ + public Vector getCountries() { + return new Vector(countries); } - public String getLocationName(int locationIdx){ - if (locationIdx < 0 || locationIdx >= (vecCountry.size() + vecCities.size() + vecContinents.size() + 1)) - { - System.out.println("Invalid locationId"); - return ""; - } else if (locationIdx < vecCountry.size()) { - return vecCountry.get(locationIdx).getName(); - } else if (locationIdx < vecCountry.size() + vecCities.size()) { - return vecCities.get(locationIdx - vecCountry.size()).getName(); - } else if (locationIdx < vecCountry.size() + vecCities.size() + vecContinents.size()){ - return vecContinents.get(locationIdx - vecCities.size() - vecCountry.size()).getName(); - } else { - return earth.getName(); - } + /** + * Given a location id returns the name of said location. + */ + public String getLocationName(int locationId) { + return locations.get(locationId).getName(); } - public String getType(int locationIdx){ - if (locationIdx < 0 || locationIdx >= (vecCountry.size() + vecCities.size() + vecContinents.size() + 1)) - { - System.out.println("Invalid locationId"); - return ""; - } else if (locationIdx < vecCountry.size()) { - return vecCountry.get(locationIdx).getType(); - } else if (locationIdx < vecCountry.size() + vecCities.size()) { - return vecCities.get(locationIdx - vecCountry.size()).getType(); - } else if (locationIdx < vecCountry.size() + vecCities.size() + vecContinents.size()){ - return vecContinents.get(locationIdx - vecCities.size() - vecCountry.size()).getType(); - } else { - return earth.getType(); - } + + /** + * Given a location id returns the population of said location. + */ + public Long getPopulation(int locationId) { + return locations.get(locationId).getPopulation(); } - public double getLatt(int locationIdx){ - if (locationIdx < 0 || locationIdx >= (vecCountry.size() + vecCities.size() + vecContinents.size() + 1)) - { - System.out.println("Invalid locationId"); - return 0; - } else if (locationIdx < vecCountry.size()) { - return vecCountry.get(locationIdx).getLatt(); - } else if (locationIdx < vecCountry.size() + vecCities.size()) { - return vecCities.get(locationIdx - vecCountry.size()).getLatt(); - } else if (locationIdx < vecCountry.size() + vecCities.size() + vecContinents.size()){ - return vecContinents.get(locationIdx - vecCities.size() - vecCountry.size()).getLatt(); - } else { - return earth.getLatt(); - } - } - public double getLongt(int locationIdx){ - if (locationIdx < 0 || locationIdx >= (vecCountry.size() + vecCities.size() + vecContinents.size() + 1)) { - System.out.println("Invalid locationId"); - return 0; - } else if (locationIdx < vecCountry.size()) { - return vecCountry.get(locationIdx).getLongt(); - } else if (locationIdx < vecCountry.size() + vecCities.size()) { - return vecCities.get(locationIdx - vecCountry.size()).getLongt(); - } else if (locationIdx < vecCountry.size() + vecCities.size() + vecContinents.size()){ - return vecContinents.get(locationIdx - vecCities.size() - vecCountry.size()).getLongt(); - } else { - return earth.getLongt(); - } - } - public String getCountryName(int countryId) { - if (countryId < 0 || countryId >= vecCountry.size()) - { - System.out.println("Invalid countryId"); - return ""; - } - - return vecCities.get(countryId).getName(); + /** + * Given a location id returns the Type ({@link ldbc.socialnet.dbgen.objects.Location#CITY} | + * {@link ldbc.socialnet.dbgen.objects.Location#COUNTRY} | {@link ldbc.socialnet.dbgen.objects.Location#CONTINENT} | + * {@link ldbc.socialnet.dbgen.objects.Location#AREA}) of said location. + */ + public String getType(int locationId) { + return locations.get(locationId).getType(); + } + + /** + * Given a location id returns the latitude of said location. + */ + public double getLatt(int locationId) { + return locations.get(locationId).getLatt(); } - public String getCityName(int cityId) { - if (cityId < vecCountry.size() || cityId >= (vecCountry.size() + vecCities.size())) - { - System.out.println("Invalid cityId"); - return ""; - } - - return vecCities.get(cityId - vecCountry.size()).getName(); + /** + * Given a location id returns the longitude of said location. + */ + public double getLongt(int locationId) { + return locations.get(locationId).getLongt(); } - public int getCityId(String cityName) { - if (!citiesNameMapping.containsKey(cityName)) - { - System.out.println("Invalid cityId"); - return -1; + /** + * Given a city name returns the id of the city or {{@link #INVALID_LOCATION} if it does not exist. + */ + public int getCityId(String cityName) { + if (!cityNames.containsKey(cityName)) { + return INVALID_LOCATION; } - - return citiesNameMapping.get(cityName) + vecCountry.size(); + return cityNames.get(cityName); + } + + /** + * Given a country name returns the id of the country or {{@link #INVALID_LOCATION} if it does not exist. + */ + public int getCountryId(String countryName) { + if (!countryNames.containsKey(countryName)) { + return INVALID_LOCATION; + } + return countryNames.get(countryName); } + /** + * Given a location id returns the id of the location which the input is part of or + * {{@link #INVALID_LOCATION} if it does not exist any. + */ public int belongsTo(int locationId) { - if (locationId < 0 || locationId >= (vecCountry.size() + vecCities.size())) { - return -1; - } else if (locationId < vecCountry.size() && countryContinent.containsKey(locationId)) { - return vecContinents.get(countryContinent.get(locationId)).getId(); - } else if (locationId < vecCountry.size() + vecCities.size() && cityCountry.containsKey(locationId-vecCountry.size())) { - return vecCountry.get(cityCountry.get(locationId-vecCountry.size())).getId(); + if (!isPartOf.containsKey(locationId)) { + return INVALID_LOCATION; } - return earth.getId(); + return isPartOf.get(locationId); } + /** + * Given a country id returns an id of one of its cities. + */ public int getRandomCity(int countryId) { - if (countryId < 0 || countryId >= vecCountry.size()) - { - System.out.println("Invalid countryId"); - return -1; - } - if (vecCountryCities.get(countryId).size() == 0) - { - System.out.println("Country with no known cities"); - return -1; + if (!citiesFromCountry.containsKey(countryId)) { + System.err.println("Invalid countryId"); + return INVALID_LOCATION; } - - int randomNumber = rand.nextInt(vecCountryCities.get(countryId).size()); - return vecCountryCities.get(countryId).get(randomNumber) + vecCountry.size(); - } - - public int getContinent(int countryId) { - if (countryId < 0 || countryId >= vecCountry.size() || !countryContinent.containsKey(countryId)) - { - System.err.println("Invalid countryId"); - return -1; - } - - return countryContinent.get(countryId) + vecCountry.size() + vecCities.size(); - } - - public String getContinentName(int continentId) - { - if (continentId < (vecCountry.size() + vecCities.size()) - || continentId >= vecCountry.size() + vecCities.size() + vecContinents.size()) - { - System.err.println("Invalid continentId"); - return ""; + if (citiesFromCountry.get(countryId).size() == 0) { + System.err.println("Country with no known cities"); + return INVALID_LOCATION; } - return vecContinents.get(continentId - vecCountry.size() - vecCities.size()).getName(); - } + int randomNumber = rand.nextInt(citiesFromCountry.get(countryId).size()); + return citiesFromCountry.get(countryId).get(randomNumber); + } - public void init(){ - try { - vecLocationDistribution = new Vector(); - vecCountry = new Vector(); - vecCities = new Vector(); - vecContinents = new Vector(); - countryNameMapping = new HashMap(); - citiesNameMapping = new HashMap(); - continentsNameMapping = new HashMap(); - vecCountryCities = new Vector>(); - vecContinentCountries = new Vector>(); - countryContinent = new HashMap(); - cityCountry = new HashMap(); - - dictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(dicCountryFile), "UTF-8")); + /** + * Initializes the dictionary. + */ + public void init() { + curLocationIdx = 0; + locationDistribution = new Vector(); + countryNames = new HashMap(); + cityNames = new HashMap(); + locations = new HashMap(); + isPartOf = new HashMap(); + countries = new Vector(); + citiesFromCountry = new HashMap>(); - extractLocationsCummulative(); - - orderByZ(); - - dictionary.close(); - - dictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(dicCityFile), "UTF-8")); - - extractCities(); - - dictionary.close(); - - for (int i = 0; i < vecContinents.size(); i++) { - vecContinents.get(i).setId(vecCountry.size() + vecCities.size() + i); - } - - earth = new Location(); - earth.setId(vecCountry.size() + vecCities.size() + vecContinents.size()); - earth.setName("Earth"); - earth.setLatt(0); - earth.setLongt(0); - earth.setPopulation(7000000000L); - earth.setType("AREA"); - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + readCountries(); + orderByZ(); + readCities(); + readContinents(); } - public void extractCities() - { + /** + * Reads the city data from the file. + * It only stores the cities which belonging to a known country. + */ + private void readCities() { try { + BufferedReader dictionary = new BufferedReader( + new InputStreamReader(getClass( ).getResourceAsStream(cityFile), "UTF-8")); + + int cities = 0; String line; while ((line = dictionary.readLine()) != null){ - String infos[] = line.split(" "); - if (countryNameMapping.containsKey(infos[0])) { - Integer countryId = countryNameMapping.get(infos[0]); - if (!citiesNameMapping.containsKey(infos[2])) { + String data[] = line.split(SEPARATOR_CITY); + if (countryNames.containsKey(data[0])) { + Integer countryId = countryNames.get(data[0]); + if (!cityNames.containsKey(data[2])) { Location location = new Location(); - location.setId(vecCountry.size() + vecCities.size()); - location.setName(infos[2]); - location.setLatt(vecCountry.get(countryId).getLatt()); - location.setLongt(vecCountry.get(countryId).getLongt()); + location.setId(locations.size()); + location.setName(data[2]); + location.setLatt(locations.get(countryId).getLatt()); + location.setLongt(locations.get(countryId).getLongt()); location.setPopulation(-1); location.setType(Location.CITY); - citiesNameMapping.put(infos[2], vecCities.size()); - vecCities.add(location); + locations.put(location.getId(), location); + isPartOf.put(location.getId(), countryId); + citiesFromCountry.get(countryId).add(location.getId()); + + cityNames.put(data[2], location.getId()); - Integer cityId = citiesNameMapping.get(infos[2]); - vecCountryCities.get(countryId).add(cityId); - cityCountry.put(cityId, countryId); + cities++; } - } else { - //System.err.println("Unknown country " + infos[0] + " for city " + infos[2]); } } + dictionary.close(); + System.out.println("Done ... " + cities + " cities were extracted"); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } } - - public void extractLocationsCummulative(){ - String locationName; - float cumdistribution; //cummulative distribution value - String line; - - isCummulativeDist = true; - - try { - while ((line = dictionary.readLine()) != null){ - String infos[] = line.split(" "); - locationName = infos[1]; - countryNameMapping.put(locationName,vecCountry.size()); - - Location location = new Location(); - location.setId(vecCountry.size()); - location.setName(locationName); - location.setLatt(Double.parseDouble(infos[2])); - location.setLongt(Double.parseDouble(infos[3])); - location.setPopulation(Integer.parseInt(infos[4])); - location.setType(Location.COUNTRY); - - vecCountry.add(location); - vecCountryCities.add(new Vector()); - - cumdistribution = Float.parseFloat(infos[5]); - vecLocationDistribution.add(Math.round(cumdistribution*(float)numberOfUsers)); + /** + * Reads the countries data from the file. + */ + private void readCountries() { + try { + BufferedReader dictionary = new BufferedReader( + new InputStreamReader(getClass( ).getResourceAsStream(countryFile), "UTF-8")); - if (!continentsNameMapping.containsKey(infos[0])) { - - vecContinentCountries.add(new Vector()); - Location continent = new Location(); - continent.setId(0); //Defined later - continent.setName(infos[0]); - continent.setLatt(Double.parseDouble(infos[2])); - continent.setLongt(Double.parseDouble(infos[3])); - continent.setPopulation(0); - continent.setType(Location.CONTINENT); + String line; + while ((line = dictionary.readLine()) != null){ + String data[] = line.split(SEPARATOR); + String locationName = data[1]; - continentsNameMapping.put(infos[0], vecContinents.size()); - vecContinents.add(continent); - } + Location location = new Location(); + location.setId(locations.size()); + location.setName(locationName); + location.setLatt(Double.parseDouble(data[2])); + location.setLongt(Double.parseDouble(data[3])); + location.setPopulation(Integer.parseInt(data[4])); + location.setType(Location.COUNTRY); - Integer continentId = continentsNameMapping.get(infos[0]); - Integer countryId = countryNameMapping.get(infos[1]); - countryContinent.put(countryId, continentId); - vecContinentCountries.get(continentId).add(countryId); - vecContinents.get(continentId).setPopulation(vecContinents.get(continentId).getPopulation() + vecCountry.get(countryId).getPopulation()); - } - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - - System.out.println("Done ... " + vecLocationDistribution.size() + " locations were extracted"); - //Recalculate the number of people for each locations + locations.put(location.getId(), location); + countryNames.put(locationName, location.getId()); + float cummulativeDistribution = Float.parseFloat(data[5]); + locationDistribution.add(Math.round(cummulativeDistribution * (float)numUsers)); + countries.add(location.getId()); + + citiesFromCountry.put(location.getId(), new Vector()); + } + dictionary.close(); + System.out.println("Done ... " + countries.size() + " countries were extracted"); + } catch (IOException e) { + e.printStackTrace(); + } } - public void extractLocations(){ - String locationName; - float cumdistribution; //cummulative distribution value - String line; - int total = 0; - int lasttotal = 0; - - isCummulativeDist = false; - - try { - while ((line = dictionary.readLine()) != null){ - String infos[] = line.split(" "); - locationName = infos[1]; - cumdistribution = Integer.parseInt(infos[4]); - - Location location = new Location(); - location.setId(vecCountry.size()); - location.setName(locationName); - location.setLatt(Double.parseDouble(infos[1])); - location.setLongt(Double.parseDouble(infos[2])); - location.setPopulation(Integer.parseInt(infos[3])); - location.setType("COUNTRY"); - - vecCountry.add(location); - vecCountryCities.add(new Vector()); - - total = Math.round(cumdistribution*(float)numberOfUsers); - vecLocationDistribution.add(total - lasttotal); - lasttotal = total; - - if (!continentsNameMapping.containsKey(infos[0])) { - vecContinentCountries.add(new Vector()); + + /** + * Reads the continent data from the file and links a country to a continent. + */ + private void readContinents() { + HashMap treatedContinents = new HashMap(); + try { + BufferedReader dictionary = new BufferedReader( + new InputStreamReader(getClass( ).getResourceAsStream(countryFile), "UTF-8")); + + String line; + while ((line = dictionary.readLine()) != null){ + String data[] = line.split(SEPARATOR); + String locationName = data[1]; + + int countryId = countryNames.get(locationName); + + if (!treatedContinents.containsKey(data[0])) { + Location continent = new Location(); - continent.setId(0); //Defined later - continent.setName(infos[0]); - continent.setLatt(Double.parseDouble(infos[2])); - continent.setLongt(Double.parseDouble(infos[3])); + continent.setId(locations.size()); + continent.setName(data[0]); + continent.setLatt(Double.parseDouble(data[2])); + continent.setLongt(Double.parseDouble(data[3])); continent.setPopulation(0); - continent.setType("CONTINENT"); + continent.setType(Location.CONTINENT); - continentsNameMapping.put(infos[0], vecContinents.size()); - vecContinents.add(continent); + locations.put(continent.getId(), continent); + treatedContinents.put(data[0], continent.getId()); } - - Integer continentId = continentsNameMapping.get(infos[0]); - Integer countryId = countryNameMapping.get(infos[1]); - countryContinent.put(countryId, continentId); - vecContinentCountries.get(continentId).add(countryId); - vecContinents.get(continentId).setPopulation(vecContinents.get(continentId).getPopulation() + vecCountry.get(countryId).getPopulation()); - } - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - - System.out.println(vecLocationDistribution.size() + " locations were extracted"); - //Recalculate the number of people for each locations - } + Integer continentId = treatedContinents.get(data[0]); + long population = locations.get(continentId).getPopulation() + locations.get(countryId).getPopulation(); + locations.get(continentId).setPopulation(population); + isPartOf.put(countryId, continentId); + } + dictionary.close(); + System.out.println("Done ... " + treatedContinents.size() + " continents were extracted"); + } catch (IOException e) { + e.printStackTrace(); + } + } - public int getLocation(int userIdx){ - if (isCummulativeDist){ - if (userIdx < vecLocationDistribution.get(curLocationIdx)) return curLocationIdx; - else - { - curLocationIdx++; - return curLocationIdx; - } - - } - else{ - if (countNumOfSameLocation < vecLocationDistribution.get(curLocationIdx)){ - countNumOfSameLocation++; - return curLocationIdx; - } - else{ - countNumOfSameLocation = 0; - curLocationIdx++; - return curLocationIdx; - } - } + /** + * Gets a country id based on population. + * This method is assumed to be called in an ascending order of user ID. + */ + public int getLocation(int userId) { + if (userId >= locationDistribution.get(curLocationIdx)) { + curLocationIdx++; + } + return countries.get(curLocationIdx); } - public void orderByZ(){ - sortLocation = new LocationZorder[vecCountry.size()]; - ZOrder zorder = new ZOrder(8); + /** + * Sorts countries by its z-order value. + */ + private void orderByZ() { + ZOrder zorder = new ZOrder(8); + sortLocation = new LocationZorder[countries.size()]; - for (int i = 0; i < vecCountry.size(); i++){ - Location loc = vecCountry.get(i); + for (int i = 0; i < countries.size(); i++) { + Location loc = locations.get(countries.get(i)); int zvalue = zorder.getZValue(((int)Math.round(loc.getLongt()) + 180)/2, ((int)Math.round(loc.getLatt()) + 180)/2); - sortLocation[i] = new LocationZorder(loc.getId(),zvalue); + sortLocation[i] = new LocationZorder(loc.getId(), zvalue); } Arrays.sort(sortLocation); + System.out.println("Sorted countries according to their z-value"); - System.out.println("Sorted location according to their z-value "); - - for (int i = 0; i < sortLocation.length; i ++){ - //sortLocation[i].print(); - //System.out.println(sortLocation[i].id + " " + vecLocations.get(sortLocation[i].id).getName() + " " + sortLocation[i].zvalue); - vecCountry.get(sortLocation[i].id).setzId(i); + for (int i = 0; i < sortLocation.length; i++) { + locations.get(sortLocation[i].id).setzId(i); } } - public int getZorderID(int _locationId){ - return vecCountry.get(_locationId).getzId(); - } - public int getLocationIdFromZOrder(int _zOrderId){ - return sortLocation[_zOrderId].id; + /** + * Gets the z-order ID from the given country. + */ + public int getZorderID(int locationId) { + return locations.get(locationId).getzId(); } - class LocationZorder implements Comparable{ - int id; - int zvalue; - public LocationZorder(int _id, int _zvalue){ - this.id = _id; - this.zvalue = _zvalue; - } - public int compareTo(Object obj) - { - LocationZorder tmp = (LocationZorder)obj; - if(this.zvalue < tmp.zvalue) - { - return -1; - } - else if(this.zvalue > tmp.zvalue) - { - return 1; - } - return 0; - } - public void print(){ - System.out.println(id + " " + zvalue); - } + /** + * Gets country id from the given z-order ID. + */ + public int getLocationIdFromZOrder(int zOrderId) { + return sortLocation[zOrderId].id; } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/NamesDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/NamesDictionary.java index 313c41d9b..a5d40ddf1 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/NamesDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/NamesDictionary.java @@ -37,20 +37,10 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.RandomAccessFile; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; import java.util.Random; -import java.util.Set; -import java.util.TreeSet; import java.util.Vector; import umontreal.iro.lecuyer.probdist.GeometricDist; @@ -58,181 +48,130 @@ public class NamesDictionary { /** - * Geometric probability used in + * Geometric probability used */ private static final double GEOMETRIC_RATIO = 0.2; - //RandomAccessFile dictionary; - BufferedReader surnameDictionary; - BufferedReader givennameDictionary; + private static final int topN = 30; - //String dicFileName; - String surdicFileName; - String givendicFileName; + String surnameFile; + String givennameFile; - HashMap locationNames; + LocationDictionary locationDic; - Vector> surNamesByLocations; - Vector>> givenNamesByLocationsMale; // Year / Location / Names - Vector>> givenNamesByLocationsFemale; + HashMap> surNamesByLocations; + Vector>> givenNamesByLocationsMale; // Year / Location / Names + Vector>> givenNamesByLocationsFemale; + Random rand; + Random randUniform; GeometricDist geoDist; - Random rand; - Random randUniform; - // Store the statistic for testdriver - int[][] countBySurNames; - int[][] countByGivenNames; - - final int topN = 30; - - public NamesDictionary(String _surdicFileName, String _givendicFileName, - HashMap _locationNames, long seedRandom){ - this.locationNames = _locationNames; - //this.dicFileName = _dicFileName; - this.surdicFileName = _surdicFileName; - this.givendicFileName = _givendicFileName; - this.rand = new Random(seedRandom); - this.randUniform = new Random(seedRandom); + public NamesDictionary(String surnameFile, String givennameFile, + LocationDictionary locationDic, long seed) { + this.locationDic = locationDic; + this.surnameFile = surnameFile; + this.givennameFile = givennameFile; + + rand = new Random(seed); + randUniform = new Random(seed); geoDist = new GeometricDist(GEOMETRIC_RATIO); } - public void init(){ - try { - //dictionary = new RandomAccessFile(dicFileName, "r"); - surnameDictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(surdicFileName), "UTF-8")); - givennameDictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(givendicFileName), "UTF-8")); - - //System.out.println("Extracting names into a dictionary "); - - surNamesByLocations = new Vector>(locationNames.size()); + + public void init() { + surNamesByLocations = new HashMap>(); + for (Integer id : locationDic.getCountries()) { + surNamesByLocations.put(id, new Vector()); + } - //assume that there is only 2 periods of birthyears - int birthYearPeriod = 2; - givenNamesByLocationsMale = new Vector>>(birthYearPeriod); - givenNamesByLocationsFemale = new Vector>>(birthYearPeriod); - for (int i = 0; i < birthYearPeriod; i++){ - givenNamesByLocationsMale.add(new Vector>(locationNames.size())); - givenNamesByLocationsFemale.add(new Vector>(locationNames.size())); - for (int j = 0; j < locationNames.size(); j++){ - givenNamesByLocationsMale.lastElement().add(new Vector()); - givenNamesByLocationsFemale.lastElement().add(new Vector()); - } - } - - for (int i = 0; i < locationNames.size(); i++){ - surNamesByLocations.add(new Vector()); - } - - extractSurNames(); - - surnameDictionary.close(); - - extractGivenNames(); - - givennameDictionary.close(); - - //System.out.println("Sort popular names in Germany"); - //getFrequency(89); - //getFrequency(69); - //System.exit(-1); - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + //assume that there is only 2 periods of birthyears + int birthYearPeriod = 2; + givenNamesByLocationsMale = new Vector>>(birthYearPeriod); + givenNamesByLocationsFemale = new Vector>>(birthYearPeriod); + for (int i = 0; i < birthYearPeriod; i++){ + givenNamesByLocationsMale.add(new HashMap>()); + givenNamesByLocationsFemale.add(new HashMap>()); + for (Integer id : locationDic.getCountries()) { + givenNamesByLocationsMale.lastElement().put(id, new Vector()); + givenNamesByLocationsFemale.lastElement().put(id, new Vector()); + } + } + + extractSurNames(); + extractGivenNames(); } - public void extractSurNames(){ - System.out.println("Extract surnames by locations ..."); - String line; - String locationName; - String surName; - String lastLocationName = ""; - int curLocationId = -1; - int totalSurNames = 0; + + public void extractSurNames() { try { - while ((line = surnameDictionary.readLine()) != null){ + BufferedReader surnameDictionary = new BufferedReader( + new InputStreamReader(getClass( ).getResourceAsStream(surnameFile), "UTF-8")); + + String line; + int curLocationId = -1; + int totalSurNames = 0; + String lastLocationName = ""; + while ((line = surnameDictionary.readLine()) != null) { String infos[] = line.split(","); - locationName = infos[1]; - - if (locationName.compareTo(lastLocationName) != 0){ // New location - - if (locationNames.containsKey(locationName)){ // Check whether it exists - curLocationId = locationNames.get(locationName); - surName = infos[2].trim(); + String locationName = infos[1]; + if (locationName.compareTo(lastLocationName) != 0) { // New location + if (locationDic.getCountryId(locationName) != LocationDictionary.INVALID_LOCATION) { // Check whether it exists + curLocationId = locationDic.getCountryId(locationName); + String surName = infos[2].trim(); surNamesByLocations.get(curLocationId).add(surName); totalSurNames++; } - } - else{ - surName = infos[2].trim(); + } else { + String surName = infos[2].trim(); surNamesByLocations.get(curLocationId).add(surName); totalSurNames++; } - } - + surnameDictionary.close(); System.out.println("Done ... " + totalSurNames + " surnames were extracted "); - - // For statictic of the testdriver - //countBySurNames = new int[locationNames.size()][totalNumNames]; - //countByGivenNames = new int[locationNames.size()][totalNumNames]; - } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } } - public void extractGivenNames(){ - System.out.println("Extract given by locations ..."); - String line; - String locationName; - String givenName; - String lastLocationName = ""; - int curLocationId = -1; - int totalGivenNames = 0; - int gender; - int birthYearPeriod; + public void extractGivenNames() { try { + BufferedReader givennameDictionary = new BufferedReader( + new InputStreamReader(getClass( ).getResourceAsStream(givennameFile), "UTF-8")); + + String line; + int curLocationId = -1; + int totalGivenNames = 0; + String lastLocationName = ""; while ((line = givennameDictionary.readLine()) != null){ String infos[] = line.split(" "); - locationName = infos[0]; - gender = Integer.parseInt(infos[2]); - birthYearPeriod = Integer.parseInt(infos[3]); + String locationName = infos[0]; + int gender = Integer.parseInt(infos[2]); + int birthYearPeriod = Integer.parseInt(infos[3]); - if (locationName.compareTo(lastLocationName) != 0){ // New location - - if (locationNames.containsKey(locationName)){ // Check whether it exists - curLocationId = locationNames.get(locationName); - givenName = infos[1].trim(); - if (gender == 0) + if (locationName.compareTo(lastLocationName) != 0) { // New location + if (locationDic.getCountryId(locationName) != LocationDictionary.INVALID_LOCATION){ // Check whether it exists + curLocationId = locationDic.getCountryId(locationName); + String givenName = infos[1].trim(); + if (gender == 0) { givenNamesByLocationsMale.get(birthYearPeriod).get(curLocationId).add(givenName); - else + } else { givenNamesByLocationsFemale.get(birthYearPeriod).get(curLocationId).add(givenName); - + } totalGivenNames++; } - } - else{ - givenName = infos[1].trim(); - if (gender == 0) + } else { + String givenName = infos[1].trim(); + if (gender == 0) { givenNamesByLocationsMale.get(birthYearPeriod).get(curLocationId).add(givenName); - else + } else { givenNamesByLocationsFemale.get(birthYearPeriod).get(curLocationId).add(givenName); - + } totalGivenNames++; } - } - + givennameDictionary.close(); System.out.println("Done ... " + totalGivenNames + " given names were extracted "); - - // For statictic of the testdriver - //countBySurNames = new int[locationNames.size()][totalNumNames]; - //countByGivenNames = new int[locationNames.size()][totalNumNames]; - } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } } @@ -244,164 +183,46 @@ public void extractGivenNames(){ * Else, from 0 to (limitRank - 1) will be distributed according to * geometric distribution, out of this scope will be distribution */ - - public int getGeoDistRandomIdx(int numOfNames){ - int limitRank = topN; + private int getGeoDistRandomIdx(int numNames){ int nameIdx = -1; double prob = rand.nextDouble(); - int rank = geoDist.inverseFInt(prob); - if (rank < limitRank) - if (numOfNames > rank) - nameIdx = rank; - else - nameIdx = randUniform.nextInt(numOfNames); - else - if (numOfNames > rank) - nameIdx = limitRank + randUniform.nextInt(numOfNames - limitRank); - else - nameIdx = randUniform.nextInt(numOfNames); - - return nameIdx; - } - public String getRandomSurName(int locationId){ - - String surName = ""; - int surNameIdx = getGeoDistRandomIdx(surNamesByLocations.get(locationId).size()); - surName = surNamesByLocations.get(locationId).get(surNameIdx); - - // For statistic of the test driver - //countBySurNames[locationId][randomSurNameIdx]++; - - return surName; - } - public String getRandomGivenName(int locationId, boolean isMale, int birthYear){ - String givenName = ""; - int givenNameIdx; - int period = -1; - if (birthYear < 1985) - period = 0; - else - period = 1; - - // Note that, only vector of names for the first period - // contains list of names not in topN - if (isMale){ - //givenNameIdx = getGeoDistRandomIdx(givenNamesByLocationsMale.get(period).get(locationId).size()); - givenNameIdx = getGeoDistRandomIdx(givenNamesByLocationsMale.get(0).get(locationId).size()); - if (givenNameIdx >= topN){ - givenName = givenNamesByLocationsMale.get(0).get(locationId).get(givenNameIdx); - } - else{ - givenName = givenNamesByLocationsMale.get(period).get(locationId).get(givenNameIdx); - } - } - else{ - givenNameIdx = getGeoDistRandomIdx(givenNamesByLocationsFemale.get(0).get(locationId).size()); - if (givenNameIdx >= topN){ - givenName = givenNamesByLocationsFemale.get(0).get(locationId).get(givenNameIdx); + if (rank < topN) { + if (numNames > rank) { + nameIdx = rank; + } else { + nameIdx = randUniform.nextInt(numNames); } - else{ - givenName = givenNamesByLocationsFemale.get(period).get(locationId).get(givenNameIdx); + } else { + if (numNames > rank) { + nameIdx = topN + randUniform.nextInt(numNames - topN); + } else { + nameIdx = randUniform.nextInt(numNames); } } - - // For statistic of the test driver - //countByGivenNames[locationId][randomGivenNameIdx]++; - - return givenName; - } - - public Vector> getSurNamesByLocations() { - return surNamesByLocations; - } - public void setSurNamesByLocations(Vector> surNamesByLocations) { - this.surNamesByLocations = surNamesByLocations; - } - - public int[][] getCountBySurNames() { - return countBySurNames; - } - public void setCountBySurNames(int[][] countBySurNames) { - this.countBySurNames = countBySurNames; - } - public int[][] getCountByGivenNames() { - return countByGivenNames; - } - public void setCountByGivenNames(int[][] countByGivenNames) { - this.countByGivenNames = countByGivenNames; + return nameIdx; } - - /* - * The remaining part is for getting frequency of a name and do sorting - * according to names' frequencies - * DO NOT NEED NOW - */ + public String getRandomSurname(int locationId) { + int surNameIdx = getGeoDistRandomIdx(surNamesByLocations.get(locationId).size()); + return surNamesByLocations.get(locationId).get(surNameIdx); + } - public void getFrequency(int index){ - // Sort - - Vector names = surNamesByLocations.get(index); - - Vector nameFrequency = new Vector(); - - Collections.sort(names); - String preName = ""; - int count =0; - int totalCount = 0; - - for (int i=0; i < names.size(); i++){ - - if (names.get(i).compareTo(preName) != 0){ - //System.out.println(" " + preName + " : " + count); - nameFrequency.add(new NameFreq(preName, count)); - preName = names.get(i); - count = 0; - } - count++; - totalCount++; - } - - NameFreq[] sortNameFreq = new NameFreq[nameFrequency.size()]; - for (int i = 0; i < nameFrequency.size(); i ++){ - sortNameFreq[i] = new NameFreq(nameFrequency.get(i).name, nameFrequency.get(i).freq); - } + public String getRandomGivenName(int locationId, boolean isMale, int birthYear){ + String name = ""; + int period = (birthYear < 1985) ? 0 : 1; + Vector>> target = (isMale) ? givenNamesByLocationsMale : givenNamesByLocationsFemale; - System.out.println("Number of names " + sortNameFreq.length); - Arrays.sort(sortNameFreq); - for (int i = 0; i < sortNameFreq.length; i ++){ - sortNameFreq[i].printPercent(totalCount); + // Note that, only vector of names for the first period contains list of names not in topN + int nameId = getGeoDistRandomIdx(target.get(0).get(locationId).size()); + if (nameId >= topN) { + name = target.get(0).get(locationId).get(nameId); + } else { + name = target.get(period).get(locationId).get(nameId); } - } - class NameFreq implements Comparable{ - String name; - int freq; - public NameFreq(String _name, int _freq){ - this.name = _name; - this.freq = _freq; - } - public int compareTo(Object obj) - { - NameFreq tmp = (NameFreq)obj; - if(this.freq < tmp.freq) - { - return -1; - } - else if(this.freq > tmp.freq) - { - return 1; - } - return 0; - } - public void print(){ - System.out.println(name + " " + freq); - } - public void printPercent(int total){ - System.out.println(name + " " + (double)100*freq/total); - } + return name; } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/OrganizationsDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/OrganizationsDictionary.java index 20fd10462..ff724f323 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/OrganizationsDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/OrganizationsDictionary.java @@ -37,169 +37,127 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.RandomAccessFile; import java.util.HashMap; import java.util.Random; import java.util.Vector; - public class OrganizationsDictionary { - BufferedReader dicAllInstitutes; - String dicFileName; + + private static final String SEPARATOR = " "; + + String dicFileName; - HashMap locationNames; HashMap organizationToLocation; + HashMap> organizationsByLocations; - Vector> organizationsByLocations; - Random rand; - - Random randUnRelatedOrganization; - double probUnCorrelatedOrganization; - Random randUnRelatedLocation; - - // For top institutes - Random randTopUniv; - double probTopUniv; + double probTopUniv; + double probUnCorrelatedOrganization; + Random rand; + Random randTopUniv; + Random randUnRelatedOrganization; + Random randUnRelatedLocation; LocationDictionary locationDic; - public OrganizationsDictionary(String _dicFileName, LocationDictionary _locationDic, - long seedRandom, double _probUnCorrelatedOrganization, - long _seedTopUni, double _probTopUni){ - this.locationNames = _locationDic.getLocationNameMapping(); - this.organizationToLocation = new HashMap(); - this.dicFileName = _dicFileName; - this.rand = new Random(seedRandom); - this.randUnRelatedLocation = new Random(seedRandom); - this.randUnRelatedOrganization = new Random(seedRandom); - this.probUnCorrelatedOrganization = _probUnCorrelatedOrganization; - this.randTopUniv = new Random(_seedTopUni); - this.probTopUniv = _probTopUni; - this.locationDic = _locationDic; + public OrganizationsDictionary(String dicFileName, LocationDictionary locationDic, + long seedRandom, double probUnCorrelatedOrganization, + long seedTopUni, double probTopUni){ + + this.dicFileName = dicFileName; + this.probTopUniv = probTopUni; + this.locationDic = locationDic; + this.probUnCorrelatedOrganization = probUnCorrelatedOrganization; + + rand = new Random(seedRandom); + randTopUniv = new Random(seedTopUni); + randUnRelatedLocation = new Random(seedRandom); + randUnRelatedOrganization = new Random(seedRandom); } + public void init(){ - try { - dicAllInstitutes = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(dicFileName), "UTF-8")); - - System.out.println("Building dictionary of organizations (by locations)"); - - organizationsByLocations = new Vector>(locationNames.size()); - for (int i = 0; i < locationNames.size(); i++){ - organizationsByLocations.add(new Vector()); - } - - extractOrganizationNames(); - - dicAllInstitutes.close(); - - } catch (IOException e) { - e.printStackTrace(); - } + organizationToLocation = new HashMap(); + organizationsByLocations = new HashMap>(); + for (Integer id : locationDic.getCountries()){ + organizationsByLocations.put(id, new Vector()); + } + extractOrganizationNames(); } public HashMap GetOrganizationLocationMap() { return organizationToLocation; } - public void extractOrganizationNames(){ - //System.out.println("Extract organizations by location ..."); - String line; - String locationName; - String organizationName; - String lastLocationName = ""; - int curLocationId = -1; - int totalNumOrganizations = 0; + public void extractOrganizationNames() { try { + BufferedReader dicAllInstitutes = new BufferedReader( + new InputStreamReader(getClass( ).getResourceAsStream(dicFileName), "UTF-8")); + + String line; + int curLocationId = -1; + int totalNumOrganizations = 0; + String lastLocationName = ""; while ((line = dicAllInstitutes.readLine()) != null){ - String infos[] = line.split(" "); - locationName = infos[0]; - if (locationName.compareTo(lastLocationName) != 0){ - if (locationNames.containsKey(locationName)){ + String data[] = line.split(SEPARATOR); + String locationName = data[0]; + if (locationName.compareTo(lastLocationName) != 0) { + if (locationDic.getCountryId(locationName) != LocationDictionary.INVALID_LOCATION) { lastLocationName = locationName; - curLocationId = locationNames.get(locationName); - organizationName = infos[1].trim(); + curLocationId = locationDic.getCountryId(locationName); + String organizationName = data[1].trim(); organizationsByLocations.get(curLocationId).add(organizationName); - Integer cityId = locationDic.getCityId(infos[2]); + Integer cityId = locationDic.getCityId(data[2]); organizationToLocation.put(organizationName, cityId); totalNumOrganizations++; } - } - else{ - organizationName = infos[1].trim(); + } else{ + String organizationName = data[1].trim(); organizationsByLocations.get(curLocationId).add(organizationName); - Integer cityId = locationDic.getCityId(infos[2]); + Integer cityId = locationDic.getCityId(data[2]); organizationToLocation.put(organizationName, cityId); totalNumOrganizations++; } - } - + dicAllInstitutes.close(); System.out.println("Done ... " + totalNumOrganizations + " organizations were extracted"); - } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } } // 90% of people go to top-10 universities // 10% go to remaining universities - public int getRandomOrganization(int _locationId){ - int bitmask = 0x00FF; - int locationOrganization; - int randomOrganizationIdx; - int locationId = _locationId; + public int getRandomOrganization(int countryId) { + + int locationId = countryId; + double prob = randUnRelatedOrganization.nextDouble(); - // User may not study at a university in her location - if (randUnRelatedOrganization.nextDouble() > probUnCorrelatedOrganization){ - while (organizationsByLocations.get(locationId).size() == 0){ - locationId = randUnRelatedLocation.nextInt(locationNames.size()); - } - - //Select a university in top 10 - if (randTopUniv.nextDouble() < probTopUniv){ - randomOrganizationIdx = rand.nextInt( - Math.min(organizationsByLocations.get(locationId).size(), 10)); - } - //Select a random organization - else{ - randomOrganizationIdx = rand.nextInt(organizationsByLocations.get(locationId).size()); - } - - int zOrderLocation = locationDic.getZorderID(locationId); - - locationOrganization = (zOrderLocation << 24) | (randomOrganizationIdx << 12); - - return locationOrganization; + Vector countries = locationDic.getCountries(); + if (randUnRelatedOrganization.nextDouble() <= probUnCorrelatedOrganization) { + locationId = countries.get(randUnRelatedLocation.nextInt(countries.size())); } - else{ // Randomly select one institute out of the location - int uncorrelateLocationIdx = randUnRelatedLocation.nextInt(locationNames.size()); - while (organizationsByLocations.get(uncorrelateLocationIdx).size() == 0){ - uncorrelateLocationIdx = randUnRelatedLocation.nextInt(locationNames.size()); - } - - - randomOrganizationIdx = rand.nextInt(organizationsByLocations.get(uncorrelateLocationIdx).size()); - - int zOrderLocation = locationDic.getZorderID(uncorrelateLocationIdx); - - locationOrganization = (zOrderLocation << 24) | (randomOrganizationIdx << 12); - - return locationOrganization; + + while (organizationsByLocations.get(locationId).size() == 0) { + locationId = countries.get(randUnRelatedLocation.nextInt(countries.size())); + } + + int range = organizationsByLocations.get(locationId).size(); + if (prob > probUnCorrelatedOrganization && randTopUniv.nextDouble() < probTopUniv) { + range = Math.min(organizationsByLocations.get(locationId).size(), 10); } + + int randomOrganizationIdx = rand.nextInt(range); + int zOrderLocation = locationDic.getZorderID(locationId); + int locationOrganization = (zOrderLocation << 24) | (randomOrganizationIdx << 12); + return locationOrganization; } - public String getOrganizationName(int locationOrganization){ - String organization; + + public String getOrganizationName(int locationOrganization) { int zOrderlocationId = locationOrganization >> 24; int organizationId = (locationOrganization >> 12) & 0x0FFF; int locationId = locationDic.getLocationIdFromZOrder(zOrderlocationId); - organization = organizationsByLocations.get(locationId).get(organizationId); - - return organization; + return organizationsByLocations.get(locationId).get(organizationId); } - } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/PopularPlacesDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/PopularPlacesDictionary.java index b2a6178fb..95352d67e 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/PopularPlacesDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/PopularPlacesDictionary.java @@ -37,12 +37,8 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.RandomAccessFile; import java.util.HashMap; import java.util.Random; import java.util.Vector; @@ -51,196 +47,84 @@ public class PopularPlacesDictionary { + + String dicFileName; - BufferedReader dicPopularPlace; - String dicFileName; - - HashMap locationNames; - - Vector> popularPlacesByLocations; //Popular places in each country - Random randPopularPlaceId; + LocationDictionary locationDic; + HashMap> popularPlacesByLocations; - int numLocations; + Random rand; - public PopularPlacesDictionary(String _dicFileName, HashMap _locationNames, + public PopularPlacesDictionary(String dicFileName, LocationDictionary locationDic, long seedRandom){ - this.dicFileName = _dicFileName; - this.locationNames = _locationNames; - this.randPopularPlaceId = new Random(seedRandom); + this.dicFileName = dicFileName; + this.locationDic = locationDic; + + rand = new Random(seedRandom); } + public void init(){ - try { - dicPopularPlace = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(dicFileName), "UTF-8")); - - System.out.println("Building dictionary of popular places (by countries)"); - - numLocations = locationNames.size(); - - popularPlacesByLocations = new Vector>(numLocations); - for (int i = 0; i < locationNames.size(); i++){ - popularPlacesByLocations.add(new Vector()); - } - - //removePopularPlacesDuplication(); // Run only one time - - extractPopularPlaces(); - - //checkCompleteness(); - - dicPopularPlace.close(); - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + + popularPlacesByLocations = new HashMap>(); + for (Integer id : locationDic.getCountries()) { + popularPlacesByLocations.put(id, new Vector()); + } + + extractPopularPlaces(); } + public void extractPopularPlaces(){ - //System.out.println("Extract organizations by location ..."); String line; - String locationName; - String popularPlaceName; + String locationName; String lastLocationName = ""; int curLocationId = -1; int totalNumPopularPlaces = 0; - String label; - double latt; - double longt; + String label; try { - while ((line = dicPopularPlace.readLine()) != null){ - - String infos[] = line.split(" "); //country Name Label Lat Long + BufferedReader dicPopularPlace = new BufferedReader( + new InputStreamReader(getClass().getResourceAsStream(dicFileName), "UTF-8")); + + while ((line = dicPopularPlace.readLine()) != null) { + double latt; + double longt; + String infos[] = line.split(" "); locationName = infos[0]; - //System.out.println("Line in names = " + line); - if (locationName.compareTo(lastLocationName) != 0){ // New location - if (locationNames.containsKey(locationName)){ // Check whether it exists + if (locationName.compareTo(lastLocationName) != 0) { + if (locationDic.getCountryId(locationName) != LocationDictionary.INVALID_LOCATION) { lastLocationName = locationName; - curLocationId = locationNames.get(locationName); - popularPlaceName = infos[1]; + curLocationId = locationDic.getCountryId(locationName); label = infos[2]; latt = Double.parseDouble(infos[3]); longt = Double.parseDouble(infos[4]); popularPlacesByLocations.get(curLocationId).add(new PopularPlace(label, latt, longt)); - totalNumPopularPlaces++; } - - } - else{ - popularPlaceName = infos[1]; + } else { label = infos[2]; latt = Double.parseDouble(infos[3]); longt = Double.parseDouble(infos[4]); popularPlacesByLocations.get(curLocationId).add(new PopularPlace(label, latt, longt)); totalNumPopularPlaces++; } - } - + dicPopularPlace.close(); System.out.println("Done ... " + totalNumPopularPlaces + " popular places were extracted"); - } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } } - // Check whether there is any location having no institute - public void checkCompleteness(){ - for (int i = 0; i < locationNames.size(); i++){ - if (popularPlacesByLocations.get(i).size() == 0){ - System.out.println("Location " + i + " has no popular place!"); - } + public short getPopularPlace(int locationidx) { + if (popularPlacesByLocations.get(locationidx).size() == 0) { + return -1; } - System.exit(-1); - } - - public short getPopularPlace(int locationidx){ - if (popularPlacesByLocations.get(locationidx).size() == 0) return -1; - - return (short) randPopularPlaceId.nextInt(popularPlacesByLocations.get(locationidx).size()); - } - - - public int getPopularPlaceNoCheck(int locationIdx){ - return randPopularPlaceId.nextInt(popularPlacesByLocations.get(locationIdx).size()); + return (short) rand.nextInt(popularPlacesByLocations.get(locationidx).size()); } public PopularPlace getPopularPlace(int locationIdx, int placeId){ return popularPlacesByLocations.get(locationIdx).get(placeId); } - public int getNumPopularPlaces(int locationIdx){ - return popularPlacesByLocations.get(locationIdx).size(); - } - - public int getNumLocations() { - return numLocations; - } - public void setNumLocations(int numLocations) { - this.numLocations = numLocations; - } - - public void removePopularPlacesDuplication(){ - //System.out.println("Extract organizations by location ..."); - String dicPopularPlacesOriginal = "/export/scratch1/duc/work/virtuosoServer/virtuosoOPS/var/lib/virtuoso/db/popularPlacesByCountry.txt.original"; - String dicPopularPlaces = "/export/scratch1/duc/work/virtuosoServer/virtuosoOPS/var/lib/virtuoso/db/popularPlacesByCountry.txt"; - - String line; - String locationName; - String popularPlaceName; - String lastLocationName = ""; - int curLocationId = -1; - int totalNumPopularPlaces = 0; - String lastAddedPopularName = ""; - - - try { - FileOutputStream dicPopularPlaceFile; - dicPopularPlaceFile = new FileOutputStream(dicPopularPlaces); - OutputStreamWriter writer; - writer = new OutputStreamWriter(dicPopularPlaceFile); - - BufferedReader dicPopularPlace = new BufferedReader(new InputStreamReader(new FileInputStream(dicPopularPlacesOriginal), "UTF-8")); - - while ((line = dicPopularPlace.readLine()) != null){ - - String infos[] = line.split(" "); //country Name Label Lat Long - locationName = infos[0]; - //System.out.println("Line in names = " + line); - if (locationName.compareTo(lastLocationName) != 0){ // New location - if (locationNames.containsKey(locationName)){ // Check whether it exists - lastLocationName = locationName; - curLocationId = locationNames.get(locationName); - popularPlaceName = infos[1].trim(); - if (popularPlaceName.compareTo(lastAddedPopularName) != 0){ - writer.write(line + "\n"); - lastAddedPopularName = popularPlaceName; - totalNumPopularPlaces++; - } - } - - } - else{ - popularPlaceName = infos[1].trim(); - if (popularPlaceName.compareTo(lastAddedPopularName) != 0){ - writer.write(line + "\n"); - lastAddedPopularName = popularPlaceName; - totalNumPopularPlaces++; - } - - } - - } - - writer.close(); - - System.out.println("Done ... " + totalNumPopularPlaces + " organizations were extracted"); - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagDictionary.java index 1b0d9df29..e8046cb7d 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagDictionary.java @@ -37,58 +37,49 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.RandomAccessFile; import java.util.HashMap; import java.util.Random; import java.util.Vector; -import ldbc.socialnet.dbgen.objects.Tag; - public class TagDictionary { - private final String SEPARATOR = "\t"; + private static final String SEPARATOR = "\t"; - BufferedReader dictionary; - String dicFileName; - String dicTopic; - String tagClassFile; - String tagHierarchyFile; - Vector> vecTagByCountry; - Vector> vecTagCumDist; - Vector> vecTagId; - - int numCelebrity = 0 ; + int numCelebrity; + double tagCountryCorrProb; + + String dicFileName; + String dicTopic; + String tagClassFile; + String tagHierarchyFile; + + Vector> tagsByCountry; + Vector> tagCummulativeDist; - HashMap className; - HashMap classLabel; + HashMap className; + HashMap classLabel; HashMap classHierarchy; HashMap tagClass; - HashMap tagNames; - HashMap tagDescription; // a.k.a foaf:Names + HashMap tagNames; + HashMap tagDescription; // a.k.a foaf:Names + Random rnd; Random rnd2; - - int totalReferences; //Total number of references to tags - - double tagCountryCorrProb; // The probability to select a tag from its country - // May be 0.5 - - public TagDictionary(String dicTopic, String _dicFileName, String tagClassFile, String tagHierarchyFile, - int _numLocations, long seed, double _tagCountryCorrProb){ + int numLocations, long seed, double tagCountryCorrProb) { + this.dicFileName = _dicFileName; this.dicTopic = dicTopic; this.tagClassFile = tagClassFile; this.tagHierarchyFile = tagHierarchyFile; - vecTagCumDist = new Vector>(_numLocations); - vecTagId = new Vector>(_numLocations); + this.tagCountryCorrProb = tagCountryCorrProb; + + tagCummulativeDist = new Vector>(numLocations); + tagsByCountry = new Vector>(numLocations); tagNames = new HashMap(); tagClass = new HashMap(); tagDescription = new HashMap(); @@ -96,19 +87,15 @@ public TagDictionary(String dicTopic, String _dicFileName, String tagClassFile, classLabel = new HashMap(); classHierarchy = new HashMap(); - for (int i = 0; i < _numLocations; i++){ - vecTagCumDist.add(new Vector()); - vecTagId.add(new Vector()); + for (int i = 0; i < numLocations; i++){ + tagCummulativeDist.add(new Vector()); + tagsByCountry.add(new Vector()); } - tagCountryCorrProb = _tagCountryCorrProb; - - rnd = new Random(seed); + rnd = new Random(seed); rnd2 = new Random(seed); - } - - public HashMap getTagsNamesMapping() { - return tagNames; + + numCelebrity = 0; } public String getName(int id) { @@ -138,23 +125,20 @@ public Integer getClassParent(int id) { return classHierarchy.get(id); } - - public void extractTags(){ - String line; - int countryId; - double cumm; - int tagId = 0; + public void initialize() { try { - - dictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(tagClassFile), "UTF-8")); - while ((line = dictionary.readLine()) != null){ - String infos[] = line.split(SEPARATOR); - Integer classId = Integer.valueOf(infos[0]); - className.put(classId, infos[1]); - classLabel.put(classId, infos[2]); + BufferedReader dictionary = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(tagClassFile), "UTF-8")); + + String line; + while ((line = dictionary.readLine()) != null){ + String data[] = line.split(SEPARATOR); + Integer classId = Integer.valueOf(data[0]); + className.put(classId, data[1]); + classLabel.put(classId, data[2]); } - - dictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(tagHierarchyFile), "UTF-8")); + + dictionary.close(); + dictionary = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(tagHierarchyFile), "UTF-8")); while ((line = dictionary.readLine()) != null){ String infos[] = line.split(SEPARATOR); Integer classId = Integer.valueOf(infos[0]); @@ -162,74 +146,67 @@ public void extractTags(){ classHierarchy.put(classId, parentId); } - dictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(dicTopic), "UTF-8")); + dictionary.close(); + dictionary = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(dicTopic), "UTF-8")); while ((line = dictionary.readLine()) != null){ String infos[] = line.split(SEPARATOR); - tagId = Integer.valueOf(infos[0]); + int tagId = Integer.valueOf(infos[0]); Integer classId = Integer.valueOf(infos[1]); tagClass.put(tagId, classId); tagNames.put(tagId, infos[2]); tagDescription.put(tagId, infos[3]); } + dictionary.close(); dictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(dicFileName), "UTF-8")); while ((line = dictionary.readLine()) != null){ String infos[] = line.split(" "); - countryId = Integer.parseInt(infos[0]); - tagId = Integer.parseInt(infos[1]); - cumm = Double.parseDouble(infos[2]); + int countryId = Integer.parseInt(infos[0]); + int tagId = Integer.parseInt(infos[1]); + double cummulative = Double.parseDouble(infos[2]); - vecTagCumDist.get(countryId).add(cumm); - vecTagId.get(countryId).add(tagId); + tagCummulativeDist.get(countryId).add(cummulative); + tagsByCountry.get(countryId).add(tagId); if (tagId + 1 > numCelebrity) { numCelebrity = tagId + 1; } } dictionary.close(); - } catch (IOException e) { e.printStackTrace(); } - } public int getaTagByCountry(int _countryId){ int countryId; countryId = _countryId; - if (vecTagId.get(countryId).size() == 0 || rnd.nextDouble() > tagCountryCorrProb){ - //Randomly get from other country. + if (tagsByCountry.get(countryId).size() == 0 || rnd.nextDouble() > tagCountryCorrProb) { do { - countryId = rnd.nextInt(vecTagId.size()); - } while (vecTagId.get(countryId).size() == 0); + countryId = rnd.nextInt(tagsByCountry.size()); + } while (tagsByCountry.get(countryId).size() == 0); } // Doing binary search for finding the tag double randomDis = rnd2.nextDouble(); int lowerBound = 0; - int upperBound = vecTagId.get(countryId).size(); - + int upperBound = tagsByCountry.get(countryId).size(); int curIdx = (upperBound + lowerBound) / 2; - while (upperBound > (lowerBound+1)){ - if (vecTagCumDist.get(countryId).get(curIdx) > randomDis ){ + while (upperBound > (lowerBound+1)) { + if (tagCummulativeDist.get(countryId).get(curIdx) > randomDis ){ upperBound = curIdx; - } - else{ + } else { lowerBound = curIdx; } curIdx = (upperBound + lowerBound) / 2; } - return vecTagId.get(countryId).get(curIdx); + return tagsByCountry.get(countryId).get(curIdx); } public int getNumCelebrity() { return numCelebrity; } - - public void setNumCelebrity(int numCelebrity) { - this.numCelebrity = numCelebrity; - } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagMatrix.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagMatrix.java index db4c1da71..61d8c0efa 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagMatrix.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagMatrix.java @@ -37,100 +37,86 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.RandomAccessFile; -import java.util.HashMap; -import java.util.TreeSet; -import java.util.Iterator; -import java.util.Map.Entry; +import java.util.HashSet; import java.util.Vector; import java.util.Random; public class TagMatrix { - BufferedReader dictionary; + private static final String SEPARATOR = " "; + String dicFileName; - Vector < Vector > vecCumulative; - Vector < Vector > vecTopicID; + Vector> vecCumulative; + Vector> vecTopicID; Random rnd; Random rnd2; - public TagMatrix(String _dicFileName, int _numCelebrities, long seed){ - this.dicFileName = _dicFileName; - vecCumulative = new Vector>(_numCelebrities); - vecTopicID = new Vector>(_numCelebrities); + public TagMatrix(String dicFileName, int numCelebrities, long seed){ + + this.dicFileName = dicFileName; - for (int i = 0; i < _numCelebrities; i++){ + vecCumulative = new Vector>(numCelebrities); + vecTopicID = new Vector>(numCelebrities); + for (int i = 0; i < numCelebrities; i++){ vecCumulative.add(new Vector()); vecTopicID.add(new Vector()); } - rnd = new Random(seed); + rnd = new Random(seed); rnd2 = new Random(seed); } public void initMatrix() { try { - dictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(dicFileName), "UTF-8")); + BufferedReader dictionary = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(dicFileName), "UTF-8")); String line; - while ((line = dictionary.readLine()) != null){ - String infos[] = line.split(" "); - int celebrityId = Integer.parseInt(infos[0]); - int topicId = Integer.parseInt(infos[1]); - double cumuluative = Double.parseDouble(infos[2]); + while ((line = dictionary.readLine()) != null) { + String data[] = line.split(SEPARATOR); + int celebrityId = Integer.parseInt(data[0]); + int topicId = Integer.parseInt(data[1]); + double cumuluative = Double.parseDouble(data[2]); vecCumulative.get(celebrityId).add(cumuluative); vecTopicID.get(celebrityId).add(topicId); } dictionary.close(); - } catch (Exception e) { e.printStackTrace(); } } // Combine the main tag and related tags - - public TreeSet getSetofTags(int _celebrityId, int numTags){ - TreeSet resultTags = new TreeSet(); - resultTags.add(_celebrityId); + public HashSet getSetofTags(int celebrityId, int numTags){ + HashSet resultTags = new HashSet(); + resultTags.add(celebrityId); while (resultTags.size() < numTags) { int tagId; - tagId = _celebrityId; - - if (vecTopicID.get(tagId).size() == 0){ - //Randomly get from other country. - do { - tagId = rnd.nextInt(vecTopicID.size()); - } while (vecTopicID.get(tagId).size() == 0); + tagId = celebrityId; + + while (vecTopicID.get(tagId).size() == 0) { + tagId = rnd.nextInt(vecTopicID.size()); } - // Doing binary search for finding the tag double randomDis = rnd2.nextDouble(); int lowerBound = 0; - int upperBound = vecTopicID.get(tagId).size(); - - int curIdx = (upperBound + lowerBound) / 2; + int upperBound = vecTopicID.get(tagId).size(); + int midPoint = (upperBound + lowerBound) / 2; while (upperBound > (lowerBound+1)){ - if (vecCumulative.get(tagId).get(curIdx) > randomDis ){ - upperBound = curIdx; - } - else{ - lowerBound = curIdx; + if (vecCumulative.get(tagId).get(midPoint) > randomDis ){ + upperBound = midPoint; + } else{ + lowerBound = midPoint; } - curIdx = (upperBound + lowerBound) / 2; + midPoint = (upperBound + lowerBound) / 2; } - resultTags.add(vecTopicID.get(tagId).get(curIdx)); + resultTags.add(vecTopicID.get(tagId).get(midPoint)); } return resultTags; diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagTextDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagTextDictionary.java index 962856b34..56acf955c 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagTextDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagTextDictionary.java @@ -40,7 +40,7 @@ import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; -import java.util.TreeSet; +import java.util.HashSet; import java.util.Iterator; import java.util.Random; @@ -58,31 +58,33 @@ public class TagTextDictionary { public static int commentId = -1; + private static final String SEPARATOR = " "; + + String dicFileName; + DateGenerator dateGen; + + TagDictionary tagDic; HashMap tagText; - String dicFileName; - - DateGenerator dateGen; - HashMap tagIdToName; Random rand; - Random randReduceText; - Random randTextSize; Random randReplyTo; + Random randTextSize; + Random randReduceText; int minSizeOfText; int maxSizeOfText; + int reduceTextSize; int minSizeOfComment; int maxSizeOfComment; - int reduceTextSize; double reduceTextRatio; - public TagTextDictionary(String dicFileName, DateGenerator dateGen, HashMap tagIdToName, + public TagTextDictionary(String dicFileName, DateGenerator dateGen, TagDictionary tagDic, int minSizeOfText, int maxSizeOfText, int minSizeOfComment, int maxSizeOfComment, double reduceTextRatio, long seed, long seedTextSize){ this.dicFileName = dicFileName; this.tagText = new HashMap(); this.dateGen = dateGen; - this.tagIdToName = tagIdToName; + this.tagDic = tagDic; rand = new Random(seed); randReduceText = new Random(seed); randReplyTo = new Random(seed); @@ -100,9 +102,9 @@ public void initialize() { BufferedReader dictionary = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(dicFileName), "UTF-8")); String line; while ((line = dictionary.readLine()) != null){ - String[] splitted = line.split(" "); - Integer id = Integer.valueOf(splitted[0]); - tagText.put(id, splitted[1]); + String[] data = line.split(SEPARATOR); + Integer id = Integer.valueOf(data[0]); + tagText.put(id, data[1]); } dictionary.close(); } catch (Exception e) { @@ -114,7 +116,7 @@ public String getTagText(int id) { return tagText.get(id); } - public String getRandomText(TreeSet tags) { + public String getRandomText(HashSet tags) { int textSize; int startingPos; @@ -140,7 +142,7 @@ public String getRandomText(TreeSet tags) { startingPos = randTextSize.nextInt(content.length() - textSize); String finalString = content.substring(startingPos, startingPos + textSize - 1); - String tagName = tagIdToName.get(tag).replace("_", " "); + String tagName = tagDic.getName(tag).replace("_", " "); tagName = tagName.replace("\"", "\\\""); String prefix = "About " +tagName+ ", "; @@ -213,10 +215,10 @@ public Post createPost(ReducedUserProfile user, int maxNumberOfLikes, post.setCreatedDate(dateGen.randomPostCreatedDate(user)); post.setForumId(user.getAccountId() * 2); post.setUserAgent(userAgentDic.getUserAgentName(user.isHaveSmartPhone(), user.getAgentIdx())); - ipAddDic.setPostIPAdress(user.isFrequentChange(), user.getIpAddress(), post); + post.setIpAddress(ipAddDic.getIP(user.getIpAddress(), user.isFrequentChange(), post.getCreatedDate())); post.setBrowserIdx(browserDic.getPostBrowserId(user.getBrowserIdx())); - TreeSet tags = new TreeSet(); + HashSet tags = new HashSet(); Iterator it = user.getSetOfTags().iterator(); while (it.hasNext()) { Integer value = it.next(); @@ -236,7 +238,7 @@ public Post createPost(ReducedUserProfile user, int maxNumberOfLikes, post.setInterestedUserAccs(likes); long[] likeTimestamp = new long[likes.length]; for (int i = 0; i < likes.length; i++) { - likeTimestamp[i] = (long)(rand.nextDouble()*DateGenerator.sevenDayInMillis+post.getCreatedDate()); + likeTimestamp[i] = (long)(rand.nextDouble()*DateGenerator.SEVEN_DAYS+post.getCreatedDate()); } post.setInterestedUserAccsTimestamp(likeTimestamp); @@ -258,10 +260,10 @@ public Post createPost(Group group, int maxNumberOfLikes, post.setCreatedDate(dateGen.randomGroupPostCreatedDate(memberShip.getJoinDate())); post.setForumId(group.getForumWallId()); post.setUserAgent(userAgentDic.getUserAgentName(memberShip.isHaveSmartPhone(), memberShip.getAgentIdx())); - ipAddDic.setPostIPAdress(memberShip.isFrequentChange(), memberShip.getIP(), post); + post.setIpAddress(ipAddDic.getIP(memberShip.getIP(), memberShip.isFrequentChange(), post.getCreatedDate())); post.setBrowserIdx(browserDic.getPostBrowserId(memberShip.getBrowserIdx())); - TreeSet tags = new TreeSet(); + HashSet tags = new HashSet(); for (int i = 0; i < group.getTags().length; i++) { tags.add(group.getTags()[i]); } @@ -274,7 +276,7 @@ public Post createPost(Group group, int maxNumberOfLikes, post.setInterestedUserAccs(likes); long[] likeTimestamp = new long[likes.length]; for (int i = 0; i < likes.length; i++) { - likeTimestamp[i] = (long)(rand.nextDouble()*DateGenerator.sevenDayInMillis+post.getCreatedDate()); + likeTimestamp[i] = (long)(rand.nextDouble()*DateGenerator.SEVEN_DAYS+post.getCreatedDate()); } post.setInterestedUserAccsTimestamp(likeTimestamp); @@ -332,15 +334,14 @@ public Comment createComment(Post post, ReducedUserProfile user, comment.setPostId(post.getPostId()); comment.setReply_of(getReplyToId(startCommentId, lastCommentId)); comment.setForumId(post.getForumId()); + comment.setCreateDate(dateGen.powerlawCommDateDay(lastCommentCreatedDate)); comment.setUserAgent(userAgentDic.getUserAgentName(friend.isHaveSmartPhone(), friend.getAgentIdx())); - ipAddDic.setCommentIPAdress(friend.isFrequentChange(), friend.getSourceIp(), comment); + comment.setIpAddress(ipAddDic.getIP(friend.getSourceIp(), friend.isFrequentChange(), comment.getCreateDate())); comment.setBrowserIdx(browserDic.getPostBrowserId(friend.getBrowserIdx())); comment.setContent(getRandomText(post.getTags())); - comment.setCreateDate(dateGen.powerlawCommDateDay(lastCommentCreatedDate)); - return comment; } @@ -382,7 +383,7 @@ public Comment createComment(Post post, Group group, comment.setForumId(post.getForumId()); comment.setUserAgent(userAgentDic.getUserAgentName(memberShip.isHaveSmartPhone(), memberShip.getAgentIdx())); - ipAddDic.setCommentIPAdress(memberShip.isFrequentChange(), memberShip.getIP(), comment); + comment.setIpAddress(ipAddDic.getIP(memberShip.getIP(), memberShip.isFrequentChange(), comment.getCreateDate())); comment.setBrowserIdx(browserDic.getPostBrowserId(memberShip.getBrowserIdx())); comment.setContent(getRandomText(post.getTags())); diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/UserAgentDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/UserAgentDictionary.java index a1b9c0ca4..c090e4690 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/UserAgentDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/UserAgentDictionary.java @@ -37,86 +37,51 @@ package ldbc.socialnet.dbgen.dictionary; import java.io.BufferedReader; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.RandomAccessFile; import java.util.Random; import java.util.Vector; -import ldbc.socialnet.dbgen.objects.Comment; -import ldbc.socialnet.dbgen.objects.Friend; -import ldbc.socialnet.dbgen.objects.GroupMemberShip; -import ldbc.socialnet.dbgen.objects.Photo; -import ldbc.socialnet.dbgen.objects.Post; -import ldbc.socialnet.dbgen.objects.ReducedUserProfile; -import ldbc.socialnet.dbgen.objects.UserProfile; - public class UserAgentDictionary { - String agentFileName = ""; - Vector vUserAgents; - BufferedReader agentFile; - Random randGen; - double probSentFromAgent; - Random randSentFrom; + + String fileName; + + Vector userAgents; + double probSentFromAgent; + + Random randGen; + Random randSentFrom; - public UserAgentDictionary(String _agentFileName, long seed, long seed2, double _probSentFromAgent){ - this.agentFileName = _agentFileName; + public UserAgentDictionary(String fileName, long seed, long seed2, double probSentFromAgent){ + this.fileName = fileName; + this.probSentFromAgent = probSentFromAgent; + randGen = new Random(seed); - randSentFrom = new Random(seed2); - this.probSentFromAgent = _probSentFromAgent; + randSentFrom = new Random(seed2); } public void init(){ - try { - vUserAgents = new Vector(); - agentFile = new BufferedReader(new InputStreamReader(getClass( ).getResourceAsStream(agentFileName), "UTF-8")); - - extractAgents(); + userAgents = new Vector(); + BufferedReader agentFile = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(fileName), "UTF-8")); + String line; + while ((line = agentFile.readLine()) != null) { + userAgents.add(line.trim()); + } agentFile.close(); - - System.out.println("Done ... " + vUserAgents.size() + " agents have been extracted "); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - - public void extractAgents(){ - String line; - - try { - while ((line = agentFile.readLine()) != null){ - vUserAgents.add(line.trim()); - } - + System.out.println("Done ... " + userAgents.size() + " agents have been extracted "); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } } public String getUserAgentName(boolean hasSmathPhone, byte agentId){ - // Sent from user's agent - if (hasSmathPhone && (randSentFrom.nextDouble() > probSentFromAgent)){ - return getUserAgent(agentId); - } - return ""; - } - - public String getUniformRandomAgent(){ - int randIdx = randGen.nextInt(vUserAgents.size()); - - return vUserAgents.get(randIdx); + return (hasSmathPhone && (randSentFrom.nextDouble() > probSentFromAgent)) ? userAgents.get(agentId) : ""; } public byte getRandomUserAgentIdx(){ - return (byte)randGen.nextInt(vUserAgents.size()); + return (byte)randGen.nextInt(userAgents.size()); } - public String getUserAgent(int idx){ - return vUserAgents.get(idx); - } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/DateGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/DateGenerator.java index aa71e9fa2..86e861225 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/DateGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/DateGenerator.java @@ -47,41 +47,27 @@ public class DateGenerator { - public static long oneDayInMillis = 24*60*60*1000; - public static long thirtyDayInMillis = (long)24*60*60*1000*30; - public static long sevenDayInMillis = (long)24*60*60*1000*7; - public static long thirtyYear = (long)24*60*60*1000*365*30; - public static long tenYear = (long)24*60*60*1000*365*10; - public static long oneYear = (long)24*60*60*1000*365; - public static long twoYear = (long)24*60*60*1000*365*2; - - - - private long from, to; - private long fromBirthDay, toBirthDay; + public static long ONE_DAY = 24L * 60L * 60L * 1000L; + public static long SEVEN_DAYS = 7L * ONE_DAY; + public static long THIRTY_DAYS = 30L * ONE_DAY; + public static long ONE_YEAR = 365L * ONE_DAY; + public static long TWO_YEARS = 2L * ONE_YEAR; + public static long TEN_YEARS = 10L * ONE_YEAR; + public static long THIRTY_YEARS = 30L * ONE_YEAR; + + private long from; + private long to; + private long fromBirthDay; + private long toBirthDay; GregorianCalendar birthCalendar; private Random ranGen; private Random ranClassYear; private Random ranWorkingYear; - private Random thirtyDayRanGen; private Random sevenDayRanGen; private PowerDistGenerator disGen; - - /* - public DateGenerator(GregorianCalendar from, GregorianCalendar to, Long seed) - { - this.from = from.getTimeInMillis(); - this.to = to.getTimeInMillis(); - ranGen = new Random(seed); - ranClassYear = new Random(seed); - ranWorkingYear = new Random(seed); - - } - */ - // This constructor is for the case of friendship's created date generator public DateGenerator(GregorianCalendar from, GregorianCalendar to, Long seed, Long seedForThirtyday, double alphaForPowerlaw) @@ -103,44 +89,6 @@ public DateGenerator(GregorianCalendar from, GregorianCalendar to, this.birthCalendar = new GregorianCalendar(); } - /* - * Date generator with range from - (from+toSpanInDays) - */ - /* - public DateGenerator(GregorianCalendar from, Integer toSpanInDays, Long seed) - { - this.from = from.getTimeInMillis(); - this.to = this.from + oneDayInMillis*toSpanInDays; - - ranGen = new Random(seed); - ranClassYear = new Random(seed); - ranWorkingYear = new Random(seed); - } - */ - - /* - * Date generator with range (to-fromSpanInDays) - to - */ - /* - public DateGenerator(Integer fromSpanInDays, GregorianCalendar to, Long seed) - { - this.to = to.getTimeInMillis(); - this.from = this.to - oneDayInMillis*fromSpanInDays; - ranGen = new Random(seed); - ranClassYear = new Random(seed); - ranWorkingYear = new Random(seed); - } - - public DateGenerator(Long seed) - { - this.from = 0l; - this.to = 0l; - ranGen = new Random(seed); - ranClassYear = new Random(seed); - ranWorkingYear = new Random(seed); - } - */ - /* * Date between from and to */ @@ -284,7 +232,7 @@ public Long randomDateInMillis(Long from, Long to) } public Long randomThirtyDaysSpan(Long from){ - long randomSpanMilis = (long) (thirtyDayRanGen.nextDouble()* (thirtyDayInMillis)); + long randomSpanMilis = (long) (thirtyDayRanGen.nextDouble()* (THIRTY_DAYS)); return (from + randomSpanMilis); } public long randomFriendRequestedDate(UserProfile user1, UserProfile user2){ @@ -299,23 +247,23 @@ public long randomFriendRequestedDate(ReducedUserProfile user1, ReducedUserProfi } public long randomFriendApprovedDate(long requestedDate){ - long randomSpanMilis = (long) (sevenDayRanGen.nextDouble()* (sevenDayInMillis)); + long randomSpanMilis = (long) (sevenDayRanGen.nextDouble()* (SEVEN_DAYS)); return (requestedDate + randomSpanMilis); } public long randomFriendDeclinedDate(long requestedDate){ - long randomSpanMilis = (long) (sevenDayRanGen.nextDouble()* (sevenDayInMillis)); + long randomSpanMilis = (long) (sevenDayRanGen.nextDouble()* (SEVEN_DAYS)); return (requestedDate + randomSpanMilis); } public long randomFriendReapprovedDate(long declined){ - long randomSpanMilis = (long) (thirtyDayRanGen.nextDouble()* (thirtyDayInMillis)); + long randomSpanMilis = (long) (thirtyDayRanGen.nextDouble()* (THIRTY_DAYS)); return (declined + randomSpanMilis); } public long numberOfMonths(ReducedUserProfile user){ - return (to - user.getCreatedDate())/thirtyDayInMillis; + return (to - user.getCreatedDate())/THIRTY_DAYS; } public long numberOfMonths(long fromDate){ - return (to - fromDate)/thirtyDayInMillis; + return (to - fromDate)/THIRTY_DAYS; } public long randomPostCreatedDate(ReducedUserProfile user){ @@ -370,19 +318,11 @@ public long powerlawCommentCreatDate(long lastCommentCreatedDate){ } public long powerlawCommDateDay(long lastCommentCreatedDate){ - long createdDate = (long)(disGen.getDouble() * oneDayInMillis+lastCommentCreatedDate); + long createdDate = (long)(disGen.getDouble() * ONE_DAY+lastCommentCreatedDate); return createdDate; } - - // Assume that users are of 10 to 70 years' old - // Randomly select a long value in this range, then, minus this value from user's created date - /* - public long getBirthDay(long userCreatedDate){ - long age = (long)(ranGen.nextDouble() * thirtyYear + tenYear); - return (userCreatedDate - age); - } - */ + // The birthday is fixed during 1980 --> 1990 public long getBirthDay(long userCreatedDate){ long date = (long)(ranGen.nextDouble()*(toBirthDay -fromBirthDay)+fromBirthDay); @@ -399,30 +339,30 @@ public int getBirthYear(long birthDay){ public long getClassYear(long userCreatedDate, long birthday){ long age; - long graduateage = (ranClassYear.nextInt(5) + 20) * oneYear; + long graduateage = (ranClassYear.nextInt(5) + 20) * ONE_YEAR; if (birthday != -1){ return (long)(birthday + graduateage); } else{ - age = (long)(ranGen.nextDouble() * thirtyYear + tenYear); + age = (long)(ranGen.nextDouble() * THIRTY_YEARS + TEN_YEARS); return (userCreatedDate - age + graduateage); } } public long getWorkFromYear(long userCreatedDate, long birthday){ long age; - long workingage = (ranClassYear.nextInt(10) + 25) * oneYear; + long workingage = (ranClassYear.nextInt(10) + 25) * ONE_YEAR; if (birthday != -1){ return (long)(birthday + workingage); } else{ - age = (long)(ranGen.nextDouble() * thirtyYear + tenYear); + age = (long)(ranGen.nextDouble() * THIRTY_YEARS + TEN_YEARS); return (userCreatedDate - age + workingage); } } public long getWorkFromYear(long classYear){ - return (classYear + (long)(ranWorkingYear.nextDouble()*twoYear)); + return (classYear + (long)(ranWorkingYear.nextDouble()*TWO_YEARS)); } public long getStartDateTime(){ diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java index 86efcbc86..a9bedb830 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java @@ -36,7 +36,7 @@ */ package ldbc.socialnet.dbgen.generator; -import java.util.TreeSet; +import java.util.HashSet; import java.util.Iterator; import java.util.Random; import java.util.Vector; @@ -123,7 +123,7 @@ public void generateAllGPSForAllEvents(StreamStoreManager storeMng){ for (int i = 0; i < eventSet.size(); i++){ int numAttendedUsers = randNumUser.nextInt(100) + 20; //int lastUserId = 0; - TreeSet attendedUsers = new TreeSet(); + HashSet attendedUsers = new HashSet(numAttendedUsers); while (attendedUsers.size() < numAttendedUsers){ //int step = randNumUser.nextInt(totalNumUsers - numAttendedUsers- lastUserId + j); //lastUserId = lastUserId + step + j; diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GroupGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GroupGenerator.java index 70d69d205..693826602 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GroupGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GroupGenerator.java @@ -36,39 +36,41 @@ */ package ldbc.socialnet.dbgen.generator; -import java.util.TreeSet; +import java.util.HashSet; import java.util.Iterator; import java.util.Random; +import java.util.Vector; import ldbc.socialnet.dbgen.dictionary.LocationDictionary; import ldbc.socialnet.dbgen.dictionary.TagDictionary; import ldbc.socialnet.dbgen.objects.Friend; import ldbc.socialnet.dbgen.objects.Group; import ldbc.socialnet.dbgen.objects.GroupMemberShip; -import ldbc.socialnet.dbgen.objects.IP; import ldbc.socialnet.dbgen.objects.ReducedUserProfile; import ldbc.socialnet.dbgen.objects.UserExtraInfo; public class GroupGenerator { static int groupId = 0; + static int forumId; + DateGenerator dateGenerator; LocationDictionary locationDic; TagDictionary tagDic; - static int forumId; Random randGroupInterest; - public GroupGenerator(DateGenerator _dateGenerator, LocationDictionary _locationDic, + public GroupGenerator(DateGenerator dateGenerator, LocationDictionary locationDic, TagDictionary tagDic, int numUsers, long seed){ - this.dateGenerator = _dateGenerator; - this.locationDic = _locationDic; + this.dateGenerator = dateGenerator; + this.locationDic = locationDic; this.tagDic = tagDic; - this.forumId = numUsers * 2 + 1; + + GroupGenerator.forumId = numUsers * 2 + 1; randGroupInterest = new Random(seed); } public void setForumId(int forumId) { - this.forumId = forumId; + GroupGenerator.forumId = forumId; } public Group createGroup(ReducedUserProfile user){ @@ -85,7 +87,7 @@ public Group createGroup(ReducedUserProfile user){ //Use the user location for group locationIdx group.setLocationIdx(user.getLocationIdx()); - TreeSet tagSet = user.getSetOfTags(); + HashSet tagSet = user.getSetOfTags(); Iterator iter = tagSet.iterator(); int idx = randGroupInterest.nextInt(tagSet.size()); for (int i = 0; i < idx; i++){ @@ -99,7 +101,7 @@ public Group createGroup(ReducedUserProfile user){ tags[0] = interestIdx; //Set name of group - group.setGroupName("Group for " + tagDic.getTagsNamesMapping().get(interestIdx).replace("\"","\\\"") + " in " + locationDic.getLocationName(group.getLocationIdx())); + group.setGroupName("Group for " + tagDic.getName(interestIdx).replace("\"","\\\"") + " in " + locationDic.getLocationName(group.getLocationIdx())); group.setTags(tags); @@ -109,7 +111,9 @@ public Group createGroup(ReducedUserProfile user){ public Group createAlbum(ReducedUserProfile user, UserExtraInfo extraInfo, int numAlbum, Random rand, double memberProb) { Group group = createGroup(user); group.setCreatedDate(dateGenerator.randomPhotoAlbumCreatedDate(user)); - group.setLocationIdx(rand.nextInt(locationDic.getVecLocations().size())); + Vector countries = locationDic.getCountries(); + int random = rand.nextInt(countries.size()); + group.setLocationIdx(countries.get(random)); group.setGroupName("Album " + numAlbum + " of " + extraInfo.getFirstName() + " " + extraInfo.getLastName()); Friend[] friends = user.getFriendList(); group.initAllMemberships(user.getNumFriendsAdded()); diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/MRWriter.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/MRWriter.java index 5a7656423..b60476a82 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/MRWriter.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/MRWriter.java @@ -42,7 +42,6 @@ import ldbc.socialnet.dbgen.objects.ReducedUserProfile; import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/OutputDataWriter.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/OutputDataWriter.java deleted file mode 100644 index 44da6a439..000000000 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/OutputDataWriter.java +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2013 LDBC - * Linked Data Benchmark Council (http://ldbc.eu) - * - * This file is part of ldbc_socialnet_dbgen. - * - * ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ldbc_socialnet_dbgen is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with ldbc_socialnet_dbgen. If not, see . - * - * Copyright (C) 2011 OpenLink Software - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; only Version 2 of the License dated - * June 1991. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ -package ldbc.socialnet.dbgen.generator; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.ObjectOutputStream; -import java.io.Writer; - -import ldbc.socialnet.dbgen.dictionary.LocationDictionary; -import ldbc.socialnet.dbgen.dictionary.NamesDictionary; - - -public class OutputDataWriter { - private static String outputDirectory = "td_data"; - private static String experimentDirectory = "experiment"; - private static String groupDataFilename = "gr.dat"; - private static String generalDataFilename = "general.dat"; - private static String userNameDataFilename = "names.dat"; - private static String locationDataFilename = "loc.dat"; - private static String userDataFilename = "users.dat"; - private static String socialDegreeFileName = "socialDegree"; - private static String clustCoefficientFileName = "clusteringCoef"; - - ObjectOutputStream userDataOutput; - File outputDir; - File experimentOutputDir; - - public OutputDataWriter(){ - outputDir = new File(outputDirectory); - outputDir.mkdirs(); - experimentOutputDir = new File(experimentDirectory); - experimentOutputDir.mkdirs(); - } - protected void initWritingUserData(){ - File userDataFile = new File(outputDir, userDataFilename); - try { - userDataOutput = new ObjectOutputStream(new FileOutputStream(userDataFile, false)); - } catch(IOException e) { - System.err.println("Could not open or create file " + userDataFile.getAbsolutePath()); - System.err.println(e.getMessage()); - System.exit(-1); - } - - } - protected void writeUserData(int userId, int numOfFriend){ - try { - userDataOutput.writeInt(userId); - userDataOutput.writeInt(numOfFriend); - } catch (IOException e) { - e.printStackTrace(); - } - } - protected void finishWritingUserData(){ - try { - userDataOutput.writeInt(-1); - userDataOutput.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - protected void writeGeneralDataForTestDriver(int numtotalUser, DateGenerator dateTimeGenerator){ - File generalDataFile = new File(outputDir, generalDataFilename); - ObjectOutputStream generalDataOutput; - try { - generalDataFile.createNewFile(); - generalDataOutput = new ObjectOutputStream(new FileOutputStream(generalDataFile, false)); - generalDataOutput.writeInt(numtotalUser); - generalDataOutput.writeLong(dateTimeGenerator.getCurrentDateTime()); - generalDataOutput.close(); - } catch(IOException e) { - System.err.println("Could not open or create file " + generalDataFile.getAbsolutePath()); - System.err.println(e.getMessage()); - System.exit(-1); - } - } - - protected void writeGroupDataForTestDriver(GroupGenerator groupGenerator){ - File groupDataFile = new File(outputDir, groupDataFilename); - ObjectOutputStream groupDataOutput; - try { - groupDataFile.createNewFile(); - groupDataOutput = new ObjectOutputStream(new FileOutputStream(groupDataFile, false)); - groupDataOutput.writeInt(groupGenerator.groupId); - groupDataOutput.close(); - } catch(IOException e) { - System.err.println("Could not open or create file " + groupDataFile.getAbsolutePath()); - System.err.println(e.getMessage()); - System.exit(-1); - } - } - - protected void writeLocationDataForTestDriver(LocationDictionary locationDic){ - File locationDataFile = new File(outputDir, locationDataFilename); - ObjectOutputStream locationDataOutput; - try { - locationDataFile.createNewFile(); - locationDataOutput = new ObjectOutputStream(new FileOutputStream(locationDataFile, false)); - locationDataOutput.writeObject(locationDic.getVecLocations()); - - locationDataOutput.close(); - } catch(IOException e) { - System.err.println("Could not open or create file " + locationDataFile.getAbsolutePath()); - System.err.println(e.getMessage()); - System.exit(-1); - } - } - - protected void writeNamesDataForTestDriver(NamesDictionary namesDictionary){ - File namesDataFile = new File(outputDir, userNameDataFilename); - ObjectOutputStream namesDataOutput; - try { - namesDataFile.createNewFile(); - namesDataOutput = new ObjectOutputStream(new FileOutputStream(namesDataFile, false)); - namesDataOutput.writeObject(namesDictionary.getSurNamesByLocations()); - - namesDataOutput.close(); - } catch(IOException e) { - System.err.println("Could not open or create file " + namesDataFile.getAbsolutePath()); - System.err.println(e.getMessage()); - System.exit(-1); - } - - } - - protected void writeSocialDegree(int[] socialDegrees, int numOfuser){ - try { - Writer socialDegreeOutput = null; - File file = new File(experimentOutputDir, socialDegreeFileName + numOfuser + ".dat"); - socialDegreeOutput = new BufferedWriter(new FileWriter(file)); - - for (int i = 0; i < socialDegrees.length; i++){ - socialDegreeOutput.write(i + " " + socialDegrees[i]); - socialDegreeOutput.write("\n"); - } - socialDegreeOutput.close(); - - } catch (IOException e) { - e.printStackTrace(); - } - - } - - protected void writeClusteringCoefficient(double[] coefficient, int[] socialdegree, int numOfuser){ - try { - Writer clusteringCoefOutput = null; - File file = new File(experimentOutputDir, clustCoefficientFileName + numOfuser + ".dat"); - clusteringCoefOutput = new BufferedWriter(new FileWriter(file)); - - for (int i = 1; i < coefficient.length; i++){ - if (socialdegree[i] == 0){ - continue; - } - clusteringCoefOutput.write(i + " " + (double)(coefficient[i]/(double)socialdegree[i])); - clusteringCoefOutput.write("\n"); - } - clusteringCoefOutput.close(); - - } catch (IOException e) { - e.printStackTrace(); - } - } -} diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PhotoGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PhotoGenerator.java index 7640b9f24..8354fa2aa 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PhotoGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PhotoGenerator.java @@ -36,15 +36,13 @@ */ package ldbc.socialnet.dbgen.generator; -import java.util.TreeSet; -import java.util.Iterator; +import java.util.HashSet; import java.util.Random; -import java.util.Vector; +import ldbc.socialnet.dbgen.dictionary.LocationDictionary; import ldbc.socialnet.dbgen.dictionary.PopularPlacesDictionary; import ldbc.socialnet.dbgen.objects.Group; import ldbc.socialnet.dbgen.objects.GroupMemberShip; -import ldbc.socialnet.dbgen.objects.Location; import ldbc.socialnet.dbgen.objects.Photo; import ldbc.socialnet.dbgen.objects.PopularPlace; import ldbc.socialnet.dbgen.objects.ReducedUserProfile; @@ -53,7 +51,7 @@ public class PhotoGenerator { DateGenerator dateGenerator; - Vector vLocations; + LocationDictionary locationDic; PopularPlacesDictionary dicPopularPlaces; Random rand; Random randLikes; @@ -62,11 +60,11 @@ public class PhotoGenerator { Random randPopularPlacesId; double probPopularPlaces; - public PhotoGenerator(DateGenerator _dateGen, Vector _vLocations, + public PhotoGenerator(DateGenerator _dateGen, LocationDictionary locationDic, long _seed, PopularPlacesDictionary _dicPopularPlaces, double _probPopularPlaces){ this.dateGenerator = _dateGen; - this.vLocations = _vLocations; + this.locationDic = locationDic; rand = new Random(_seed); randLikes = new Random(_seed); this.dicPopularPlaces = _dicPopularPlaces; @@ -82,17 +80,15 @@ public Photo generatePhoto(ReducedUserProfile user, Group album, photo.setAlbumId(album.getForumWallId()); photo.setCreatorId(album.getModeratorId()); - int locationIdx = album.getLocationIdx(); + int locationId = album.getLocationIdx(); byte numPopularPlace = user.getNumPopularPlace(); - photo.setLocationIdx(locationIdx); - Location location = vLocations.get(locationIdx); + photo.setLocationIdx(locationId); if (numPopularPlace == 0){ - photo.setLocationName(location.getName()); - photo.setLatt(location.getLatt()); - photo.setLongt(location.getLongt()); - } - else{ + photo.setLocationName(locationDic.getLocationName(locationId)); + photo.setLatt(locationDic.getLatt(locationId)); + photo.setLongt(locationDic.getLongt(locationId)); + } else{ int popularPlaceId; PopularPlace popularPlace; if (randPopularPlaces.nextDouble() < probPopularPlaces){ @@ -103,22 +99,18 @@ public Photo generatePhoto(ReducedUserProfile user, Group album, photo.setLocationName(popularPlace.getName()); photo.setLatt(popularPlace.getLatt()); photo.setLongt(popularPlace.getLongt()); - } - - else{ + } else{ // Randomly select one places from Album location idx - popularPlaceId = dicPopularPlaces.getPopularPlace(locationIdx); + popularPlaceId = dicPopularPlaces.getPopularPlace(locationId); if (popularPlaceId != -1){ - popularPlace = dicPopularPlaces.getPopularPlace(locationIdx,popularPlaceId); + popularPlace = dicPopularPlaces.getPopularPlace(locationId, popularPlaceId); photo.setLocationName(popularPlace.getName()); - //System.out.println("Set popular place name " + photo.getLocationName()); photo.setLatt(popularPlace.getLatt()); photo.setLongt(popularPlace.getLongt()); - } - else{ - photo.setLocationName(location.getName()); - photo.setLatt(location.getLatt()); - photo.setLongt(location.getLongt()); + } else{ + photo.setLocationName(locationDic.getLocationName(locationId)); + photo.setLatt(locationDic.getLatt(locationId)); + photo.setLongt(locationDic.getLongt(locationId)); } } } @@ -129,7 +121,7 @@ public Photo generatePhoto(ReducedUserProfile user, Group album, //Assume that the photo are created one by one after 1 second from the creation of the album photo.setTakenTime(album.getCreatedDate() + 1000*(idxInAlbum+1)); - TreeSet tags = new TreeSet(); + HashSet tags = new HashSet(); // Iterator it = user.getSetOfTags().iterator(); // while (it.hasNext()) { // Integer value = it.next(); @@ -149,7 +141,7 @@ public Photo generatePhoto(ReducedUserProfile user, Group album, photo.setInterestedUserAccs(likes); long[] likeTimestamp = new long[likes.length]; for (int i = 0; i < likes.length; i++) { - likeTimestamp[i] = (long)(rand.nextDouble()*DateGenerator.sevenDayInMillis+photo.getTakenTime()); + likeTimestamp[i] = (long)(rand.nextDouble()*DateGenerator.SEVEN_DAYS+photo.getTakenTime()); } photo.setInterestedUserAccsTimestamp(likeTimestamp); @@ -175,13 +167,5 @@ public int[] getFriendsLiked(Group album, int numOfLikes){ return friends; - } - - public Vector getvLocations() { - return vLocations; - } - public void setvLocations(Vector vLocations) { - this.vLocations = vLocations; - } - + } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PowerDistGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PowerDistGenerator.java index d8271e02d..03640f5b2 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PowerDistGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PowerDistGenerator.java @@ -37,54 +37,22 @@ package ldbc.socialnet.dbgen.generator; import java.util.Random; - import umontreal.iro.lecuyer.probdist.PowerDist; -import java.util.Arrays; - public class PowerDistGenerator { + private Random rand; private PowerDist powerDist; - private Random rand; - double a; - double b; + public PowerDistGenerator(double a, double b, double alpha, long seed){ - this.a = a; - this.b = b; - //powerDist = new PowerDist(alpha); - powerDist = new PowerDist(a, b, alpha); rand = new Random(seed); + powerDist = new PowerDist(a, b, alpha); } + public int getValue(){ - double randVal = powerDist.inverseF(rand.nextDouble()); - //return (int)(a + (b - a) * randVal); - return (int)randVal; + return (int) powerDist.inverseF(rand.nextDouble()); } public double getDouble(){ return powerDist.inverseF(rand.nextDouble()); } - - public static void main(String args[]){ - PowerDistGenerator pdg = new PowerDistGenerator(5.0, 50.0, 0.8 - , 80808080); - int[] arr = new int[400]; - for (int i = 0; i < 400; i ++){ - //System.out.println(pdg.getValue() + " "); - arr[i] = pdg.getValue(); - } - Arrays.sort(arr); - System.out.println(Arrays.toString(arr)); - - int j = 0; - int lastvalue = -1; - for (int i = 0; i < 400; i ++){ - if (lastvalue != arr[i]){ - System.out.println(lastvalue + " : " + j); - lastvalue = arr[i]; - j = 0; - } - j ++; - - } - } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java index 012fc7b85..245565628 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java @@ -47,7 +47,7 @@ import java.io.ObjectOutputStream; import java.io.Writer; import java.util.GregorianCalendar; -import java.util.TreeSet; +import java.util.HashSet; import java.util.Properties; import java.util.Random; import java.util.Vector; @@ -76,9 +76,10 @@ import ldbc.socialnet.dbgen.objects.UserExtraInfo; import ldbc.socialnet.dbgen.objects.UserProfile; import ldbc.socialnet.dbgen.serializer.CSV; +import ldbc.socialnet.dbgen.serializer.EmptySerializer; import ldbc.socialnet.dbgen.serializer.Serializer; import ldbc.socialnet.dbgen.serializer.Turtle; -import ldbc.socialnet.dbgen.storage.MFStoreManager; +import ldbc.socialnet.dbgen.serializer.Statistics; import ldbc.socialnet.dbgen.storage.StorageManager; import ldbc.socialnet.dbgen.vocabulary.SN; @@ -86,6 +87,9 @@ import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.hadoop.mapreduce.Reducer; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + public class ScalableGenerator{ public enum OrganisationType { @@ -141,6 +145,7 @@ public enum OrganisationType { private static final String DICTIONARY_DIRECTORY = "/dictionaries/"; private static final String IPZONE_DIRECTORY = "/ipaddrByCountries"; private static final String PARAMETERS_FILE = "params.ini"; + private static final String STATS_FILE = "testdata.json"; private String RDF_OUTPUT_FILE = "ldbc_socialnet_dbg"; //Not static or final: Will be modified to add the machine prefix // Dictionaries dataset files @@ -240,6 +245,9 @@ public enum OrganisationType { private final String FEMALE = "female"; private final String gender[] = { MALE, FEMALE}; + //Stat container + private Statistics stats; + // For sliding window int cellSize; // Number of user in one cell int numberOfCellPerWindow; @@ -267,7 +275,7 @@ public enum OrganisationType { int numCellRead = 0; int numCellPerfile; int numCellInLastFile; - TreeSet selectedFileIdx; + HashSet selectedFileIdx; // For friendship generation int friendshipNo; @@ -408,7 +416,6 @@ public enum OrganisationType { double probDiffIPforTraveller; // Writing data for test driver - OutputDataWriter outputDataWriter; int thresholdPopularUser = 40; int numPopularUser = 0; @@ -443,6 +450,7 @@ public ScalableGenerator(int mapreduceFileId, String sibOutputDir, String sibHom this.mapreduceFileIdx = mapreduceFileId; this.sibOutputDir = sibOutputDir; this.sibHomeDir = sibHomeDir; + this.stats = new Statistics(); System.out.println("Map Reduce File Idx is: " + mapreduceFileIdx); if (mapreduceFileIdx != -1){ @@ -561,7 +569,7 @@ private void loadParamsFromFile() { endYear = startYear + numYears; serializerType = properties.getProperty(SERIALIZER_TYPE); if (!serializerType.equals("ttl") && !serializerType.equals("n3") && - !serializerType.equals("csv")) { + !serializerType.equals("csv") && !serializerType.equals("none")) { throw new IllegalStateException("serializerType must be ttl, n3 or csv"); } } catch (Exception e) { @@ -647,17 +655,17 @@ private void _init(int mapId, boolean isFullLoad) { locationDic.init(); System.out.println("Building language dictionary "); - languageDic = new LanguageDictionary(languageDicFile, locationDic.getLocationNameMapping(), + languageDic = new LanguageDictionary(languageDicFile, locationDic, probEnglish, probSecondLang, seeds[11]); languageDic.init(); System.out.println("Building Tag dictionary "); mainTagDic = new TagDictionary(tagNamesFile, mainTagDicFile, tagClassFile, tagHierarchyFile, - locationDic.getLocationNameMapping().size(), seeds[5], tagCountryCorrProb); - mainTagDic.extractTags(); + locationDic.getCountries().size(), seeds[5], tagCountryCorrProb); + mainTagDic.initialize(); System.out.println("Building Tag-text dictionary "); - tagTextDic = new TagTextDictionary(tagTextFile, dateTimeGenerator, mainTagDic.getTagsNamesMapping(), + tagTextDic = new TagTextDictionary(tagTextFile, dateTimeGenerator, mainTagDic, minTextSize, maxTextSize, minCommentSize, maxCommentSize, ratioReduceText, seeds[15], seeds[16]); tagTextDic.initialize(); @@ -667,14 +675,14 @@ private void _init(int mapId, boolean isFullLoad) { System.out.println("Building IP addresses dictionary "); ipAddDictionary = new IPAddressDictionary(countryAbbrMappingFile, - IPZONE_DIRECTORY, locationDic.getVecLocations(), seeds[33], + IPZONE_DIRECTORY, locationDic, seeds[33], probDiffIPinTravelSeason, probDiffIPnotTravelSeason, probDiffIPforTraveller); - ipAddDictionary.init(); + ipAddDictionary.initialize(); System.out.println("Building Names dictionary"); namesDictionary = new NamesDictionary(surnamesDicFile, givennamesDicFile, - locationDic.getLocationNameMapping(), seeds[23]); + locationDic, seeds[23]); namesDictionary.init(); System.out.println("Building email dictionary"); @@ -697,7 +705,7 @@ private void _init(int mapId, boolean isFullLoad) { System.out.println("Building popular places dictionary"); popularDictionary = new PopularPlacesDictionary(popularPlacesDicFile, - locationDic.getLocationNameMapping(), seeds[46]); + locationDic, seeds[46]); popularDictionary.init(); System.out.println("Building user agents dictionary"); @@ -706,15 +714,12 @@ private void _init(int mapId, boolean isFullLoad) { System.out.println("Building photo generator"); photoGenerator = new PhotoGenerator(dateTimeGenerator, - locationDic.getVecLocations(), seeds[17], - popularDictionary, probPopularPlaces); + locationDic, seeds[17], popularDictionary, probPopularPlaces); System.out.println("Building Group generator"); groupGenerator = new GroupGenerator(dateTimeGenerator, locationDic, mainTagDic, numtotalUser, seeds[35]); - - outputDataWriter = new OutputDataWriter(); serializer = getSerializer(serializerType, RDF_OUTPUT_FILE); } } @@ -726,8 +731,7 @@ private void _init(int mapId, boolean isFullLoad) { * @param inputFile The hadoop file with the user serialization (data and friends) * @param numCell The number of cells the generator will parse. */ - public void generateUserActivity(String inputFile, int numCell){ - startWritingUserData(); + public void generateUserActivity(String inputFile, int numCell) { long startPostGeneration = System.currentTimeMillis(); @@ -739,20 +743,17 @@ public void generateUserActivity(String inputFile, int numCell){ long endPostGeneration = System.currentTimeMillis(); System.out.println("Post generation takes " + getDurationInMinutes(startPostGeneration, endPostGeneration)); - finishWritingUserData(); - long startGroupGeneration = System.currentTimeMillis(); generateAllGroup(inputFile, numCell); long endGroupGeneration = System.currentTimeMillis(); System.out.println("Group generation takes " + getDurationInMinutes(startGroupGeneration, endGroupGeneration)); - serializer.serialize(); + serializer.close(); + writeStatistics(); - System.out.println("Number of generated triples " + serializer.triplesGenerated()); + System.out.println("Number of generated triples " + serializer.unitsGenerated()); System.out.println("Number of popular users " + numPopularUser); System.out.println("Writing the data for test driver "); - - writeDataForQGEN(); } /** @@ -834,7 +835,7 @@ public void generateGroups(int pass, int cellPos, int numCell) { Friend firstLevelFriends[] = reducedUserProfiles[curIdxInWindow].getFriendList(); Vector secondLevelFriends = new Vector(); - //TODO: Include friends of friends a.k.a second level friends. + //TODO: Include friends of friends a.k.a second level friends? int numGroup = randNumberGroup.nextInt(maxNumGroupCreatedPerUser); for (int j = 0; j < numGroup; j++) { @@ -881,10 +882,7 @@ public void pushUserProfile(ReducedUserProfile reduceUser, int pass, numUserForNewCell = 0; } } - - if (reduceUser != null){ - reduceUser = null; - } + reduceUser = null; } public void pushAllRemainingUser(int pass, Reducer.Context context, @@ -1114,7 +1112,25 @@ public void generatePosts(ReducedUserProfile user, UserExtraInfo extraInfo){ Post post = tagTextDic.createPost(user, maxNumLikes, userAgentDic, ipAddDictionary, browserDic); Integer languageIndex = randUniform.nextInt(extraInfo.getLanguages().size()); post.setLanguage(extraInfo.getLanguages().get(languageIndex)); - + + String countryName = locationDic.getLocationName((ipAddDictionary.getLocation(post.getIpAddress()))); + stats.countries.add(countryName); + + GregorianCalendar date = new GregorianCalendar(); + date.setTimeInMillis(post.getCreatedDate()); + String strCreationDate = DateGenerator.formatYear(date); + if (stats.maxPostCreationDate == null) { + stats.maxPostCreationDate = strCreationDate; + stats.minPostCreationDate = strCreationDate; + } else { + if (stats.maxPostCreationDate.compareTo(strCreationDate) < 0) { + stats.maxPostCreationDate = strCreationDate; + } + if (stats.minPostCreationDate.compareTo(strCreationDate) > 0) { + stats.minPostCreationDate = strCreationDate; + } + } + serializer.gatherData(post); // Generate comments @@ -1126,6 +1142,8 @@ public void generatePosts(ReducedUserProfile user, UserExtraInfo extraInfo){ Comment comment = tagTextDic.createComment(post, user, lastCommentCreatedDate, startCommentId, lastCommentId, userAgentDic, ipAddDictionary, browserDic); if (comment.getAuthorId() != -1) { + countryName = locationDic.getLocationName((ipAddDictionary.getLocation(comment.getIpAddress()))); + stats.countries.add(countryName); serializer.gatherData(comment); lastCommentCreatedDate = comment.getCreateDate(); @@ -1155,7 +1173,10 @@ public void generatePhoto(ReducedUserProfile user, UserExtraInfo extraInfo){ photo.setUserAgent(userAgentDic.getUserAgentName(user.isHaveSmartPhone(), user.getAgentIdx())); photo.setBrowserIdx(browserDic.getPostBrowserId(user.getBrowserIdx())); - ipAddDictionary.setPhotoIPAdress(user.isFrequentChange(), user.getIpAddress(), photo); + photo.setIpAddress(ipAddDictionary.getIP(user.getIpAddress(), + user.isFrequentChange(), photo.getTakenTime(), photo.getLocationIdx())); + String countryName = locationDic.getLocationName((ipAddDictionary.getLocation(photo.getIpAddress()))); + stats.countries.add(countryName); serializer.gatherData(photo); } @@ -1169,7 +1190,7 @@ public void createGroupForUser(ReducedUserProfile user, Group group = groupGenerator.createGroup(user); - TreeSet memberIds = new TreeSet(); + HashSet memberIds = new HashSet(); int numGroupMember = randNumberUserPerGroup.nextInt(maxNumMemberGroup); group.initAllMemberships(numGroupMember); @@ -1251,7 +1272,8 @@ public void generatePostForGroup(Group group) { // Integer languageIndex = randUniform.nextInt(extraInfo.getLanguages().size()); // post.setLanguage(extraInfo.getLanguages().get(languageIndex)); groupPost.setLanguage(-1); - + String countryName = locationDic.getLocationName((ipAddDictionary.getLocation(groupPost.getIpAddress()))); + stats.countries.add(countryName); serializer.gatherData(groupPost); @@ -1264,7 +1286,8 @@ public void generatePostForGroup(Group group) { Comment comment = tagTextDic.createComment(groupPost, group, lastCommentCreatedDate, startCommentId, lastCommentId, userAgentDic, ipAddDictionary, browserDic); if (comment.getAuthorId() != -1) { // In case the comment is not reated because of the friendship's createddate - + countryName = locationDic.getLocationName((ipAddDictionary.getLocation(comment.getIpAddress()))); + stats.countries.add(countryName); serializer.gatherData(comment); lastCommentCreatedDate = comment.getCreateDate(); @@ -1368,7 +1391,7 @@ public UserProfile generateGeneralInformation(int accountId) { // source IP userProf.setIpAddress(ipAddDictionary - .getRandomIPAddressFromLocation(userProf.getLocationIdx())); + .getRandomIPFromLocation(userProf.getLocationIdx())); // Popular places byte numPopularPlaces = (byte) randNumPopularPlaces.nextInt(maxNumPopularPlaces + 1); @@ -1392,16 +1415,12 @@ public UserProfile generateGeneralInformation(int accountId) { public void setInfoFromUserProfile(ReducedUserProfile user, UserExtraInfo userExtraInfo) { - + // The country will be always present, but the city can be missing if that data is // not available on the dictionary int locationId = (user.getCityIdx() != -1) ? user.getCityIdx() : user.getLocationIdx(); userExtraInfo.setLocationId(locationId); - if (user.getCityIdx() != -1) { - userExtraInfo.setLocation(locationDic.getCityName(user.getCityIdx())); - } else { - userExtraInfo.setLocation(locationDic.getLocationName(user.getLocationIdx())); - } + userExtraInfo.setLocation(locationDic.getLocationName(locationId)); // We consider that the distance from where user is living and double distance = randomExactLongLat.nextDouble() * 2; @@ -1461,7 +1480,7 @@ public void setInfoFromUserProfile(ReducedUserProfile user, user.getLocationIdx(),isMale, dateTimeGenerator.getBirthYear(user.getBirthDay()))); - userExtraInfo.setLastName(namesDictionary.getRandomSurName(user.getLocationIdx())); + userExtraInfo.setLastName(namesDictionary.getRandomSurname(user.getLocationIdx())); // email is created by using the user's first name + userId int numEmails = randomExtraInfo.nextInt(maxEmails) + 1; @@ -1488,12 +1507,30 @@ public void setInfoFromUserProfile(ReducedUserProfile user, prob = randomExtraInfo.nextDouble(); if (prob >= missingRatio) { for (int i = 0; i < numCompanies; i++) { + long workFrom; if (userExtraInfo.getClassYear() != -1) { - long workFrom = dateTimeGenerator.getWorkFromYear(user.getCreatedDate(), user.getBirthDay()); - userExtraInfo.addCompany(companiesDictionary.getRandomCompany(user.getLocationIdx()), workFrom); + workFrom = dateTimeGenerator.getWorkFromYear(user.getCreatedDate(), user.getBirthDay()); + } else { + workFrom = dateTimeGenerator.getWorkFromYear(userExtraInfo.getClassYear()); + } + String company = companiesDictionary.getRandomCompany(user.getLocationIdx()); + userExtraInfo.addCompany(company, workFrom); + String countryName = locationDic.getLocationName(companiesDictionary.getCountry(company)); + stats.countries.add(countryName); + + GregorianCalendar date = new GregorianCalendar(); + date.setTimeInMillis(workFrom); + String strWorkFrom = DateGenerator.formatYear(date); + if (stats.maxWorkFrom == null) { + stats.maxWorkFrom = strWorkFrom; + stats.minWorkFrom = strWorkFrom; } else { - long workFrom = dateTimeGenerator.getWorkFromYear(userExtraInfo.getClassYear()); - userExtraInfo.addCompany(companiesDictionary.getRandomCompany(user.getLocationIdx()), workFrom); + if (stats.maxWorkFrom.compareTo(strWorkFrom) < 0) { + stats.maxWorkFrom = strWorkFrom; + } + if (stats.minWorkFrom.compareTo(strWorkFrom) > 0) { + stats.minWorkFrom = strWorkFrom; + } } } } @@ -1505,13 +1542,25 @@ public void setInfoFromUserProfile(ReducedUserProfile user, userLanguages.add(internationalLang); } userExtraInfo.setLanguages(userLanguages); - - // write user data for test driver - if (user.getNumFriendsAdded() > thresholdPopularUser) { - outputDataWriter.writeUserData(user.getAccountId(), - user.getNumFriendsAdded()); - numPopularUser++; - } + + + stats.maxPersonId = Math.max(stats.maxPersonId, user.getAccountId()); + stats.minPersonId = Math.min(stats.minPersonId, user.getAccountId()); + stats.firstNames.add(userExtraInfo.getFirstName()); + String countryName = locationDic.getLocationName(user.getLocationIdx()); + stats.countries.add(countryName); + + // NOTE: [2013-08-06] The tags of posts, forums, etc.. all depend of the user ones + // if in the future this fact change add those in the statistics also. + HashSet tags = user.getSetOfTags(); + for (Integer tagID : tags) { + stats.tagNames.add(mainTagDic.getName(tagID)); + Integer parent = mainTagDic.getTagClass(tagID); + while (parent != -1) { + stats.tagClasses.add(mainTagDic.getClassName(parent)); + parent = mainTagDic.getClassParent(parent); + } + } } @@ -1564,75 +1613,46 @@ public void updateLastPassFriendAdded(int from, int to, int pass) { } private Serializer getSerializer(String type, String outputFileName) { - SN.setMachineNumber(machineId); + SN.setMachineNumber(machineId, numFiles); String t = type.toLowerCase(); if (t.equals("ttl")) { - return new Turtle(sibOutputDir + outputFileName, forwardChaining, - numRdfOutputFile, true, mainTagDic, - browserDic.getvBrowser(), companiesDictionary.getCompanyLocationMap(), + return new Turtle(sibOutputDir + outputFileName, numRdfOutputFile, true, mainTagDic, + browserDic, companiesDictionary, organizationsDictionary.GetOrganizationLocationMap(), ipAddDictionary, locationDic, languageDic); } else if (t.equals("n3")) { - return new Turtle(sibOutputDir + outputFileName, forwardChaining, - numRdfOutputFile, false, mainTagDic, - browserDic.getvBrowser(), companiesDictionary.getCompanyLocationMap(), + return new Turtle(sibOutputDir + outputFileName, numRdfOutputFile, false, mainTagDic, + browserDic, companiesDictionary, organizationsDictionary.GetOrganizationLocationMap(), ipAddDictionary, locationDic, languageDic); } else if (t.equals("csv")) { - return new CSV(sibOutputDir /*+ outputFileName*/, forwardChaining, - numRdfOutputFile, mainTagDic, - browserDic.getvBrowser(), companiesDictionary.getCompanyLocationMap(), + return new CSV(sibOutputDir, numRdfOutputFile, mainTagDic, + browserDic, companiesDictionary, organizationsDictionary.GetOrganizationLocationMap(), ipAddDictionary,locationDic, languageDic); - } else { + } else if (t.equals("none")) { + return new EmptySerializer(); + } else { System.err.println("Unexpected Serializer - Aborting"); System.exit(-1); return null; } } - private void startWritingUserData() { - outputDataWriter.initWritingUserData(); - } - - private void finishWritingUserData() { - outputDataWriter.finishWritingUserData(); - } - - private void writeDataForQGEN() { - outputDataWriter.writeGeneralDataForTestDriver(numtotalUser, dateTimeGenerator); - outputDataWriter.writeGroupDataForTestDriver(groupGenerator); - outputDataWriter.writeLocationDataForTestDriver(locationDic); - outputDataWriter.writeNamesDataForTestDriver(namesDictionary); + private void writeStatistics() { + Gson gson = new GsonBuilder().setExclusionStrategies(stats.getExclusion()).disableHtmlEscaping().create(); + FileWriter writer; + try { + stats.makeCountryPairs(locationDic); + writer = new FileWriter(sibOutputDir + "m" + machineId + STATS_FILE); + writer.append(gson.toJson(stats)); + writer.flush(); + writer.close(); + } catch (IOException e) { + System.err.println("Unable to write stastistics"); + System.err.println(e.getMessage()); + e.printStackTrace(); + } } - - public void writeToOutputFile(String filenames[], String outputfile){ - Writer output = null; - File file = new File(outputfile); - try { - output = new BufferedWriter(new FileWriter(file)); - for (int i = 0; i < (filenames.length - 1); i++) { - output.write(filenames[i] + " " + numCellPerfile + "\n"); - } - - output.write(filenames[filenames.length - 1] + " " + numCellInLastFile + "\n"); - - output.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - public void printHeapSize(){ - long heapSize = Runtime.getRuntime().totalMemory(); - long heapMaxSize = Runtime.getRuntime().maxMemory(); - long heapFreeSize = Runtime.getRuntime().freeMemory(); - - System.out.println(" ---------------------- "); - System.out.println(" Current Heap Size: " + heapSize/(1024*1024)); - System.out.println(" Max Heap Size: " + heapMaxSize/(1024*1024)); - System.out.println(" Free Heap Size: " + heapFreeSize/(1024*1024)); - System.out.println(" ---------------------- "); - } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Friend.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Friend.java index 3ab595471..d8263ccf7 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Friend.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Friend.java @@ -42,6 +42,7 @@ import java.io.Serializable; public class Friend implements Serializable{ + private static final long serialVersionUID = 3657773293974543890L; int friendAcc; long createdTime; //approved Time long requestTime; @@ -81,11 +82,9 @@ public void readFields(DataInput arg0) throws IOException{ agentIdx = arg0.readByte(); browserIdx = arg0.readByte(); isFrequentChange = arg0.readBoolean(); - short ip1 = arg0.readShort(); - short ip2 = arg0.readShort(); - short ip3 = arg0.readShort(); - short ip4 = arg0.readShort(); - this.sourceIp = new IP(ip1, ip2, ip3, ip4); + int ip = arg0.readInt(); + int mask = arg0.readInt(); + this.sourceIp = new IP(ip, mask); } public void write(DataOutput arg0) throws IOException { arg0.writeInt(friendAcc); @@ -99,11 +98,8 @@ public void write(DataOutput arg0) throws IOException { arg0.writeByte(agentIdx); arg0.writeByte(browserIdx); arg0.writeBoolean(isFrequentChange); - arg0.writeShort(sourceIp.getIp1()); - arg0.writeShort(sourceIp.getIp2()); - arg0.writeShort(sourceIp.getIp3()); - arg0.writeShort(sourceIp.getIp4()); - + arg0.writeInt(sourceIp.getIp()); + arg0.writeInt(sourceIp.getMask()); } public Friend(ReducedUserProfile user, long _requestedTime, long _declinedTime, long _createdTime, diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/GPS.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/GPS.java index 853b868f4..6c382dd6b 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/GPS.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/GPS.java @@ -39,6 +39,7 @@ import java.io.Serializable; public class GPS implements Serializable{ + private static final long serialVersionUID = 3657773293974543890L; long trackedTime; String trackedLocation; double longt; diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/IP.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/IP.java index 18d5cf154..6d5b68c78 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/IP.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/IP.java @@ -36,32 +36,52 @@ */ package ldbc.socialnet.dbgen.objects; -import java.io.Serializable; +public class IP { + + public static final int BYTE_MASK = 0xFF; + public static final int BYTE_SIZE = 8; + public static final int IP4_SIZE_BITS = 32; + public static final int IP4_SIZE_BYTES = IP4_SIZE_BITS / 8;; + public static final int BYTE1_SHIFT_POSITION = 24; + public static final int BYTE2_SHIFT_POSITION = 16; + public static final int BYTE3_SHIFT_POSITION = 8; + + int ip; + int mask; + + public IP(int byte1, int byte2, int byte3, int byte4, int networkMask) { + ip = ((byte1 & BYTE_MASK) << BYTE1_SHIFT_POSITION) | + ((byte2 & BYTE_MASK) << BYTE2_SHIFT_POSITION) | + ((byte3 & BYTE_MASK) << BYTE3_SHIFT_POSITION) | + (byte4 & BYTE_MASK); + + mask = (networkMask == IP4_SIZE_BITS) ? 0 : 1; + for (int k = networkMask+1; k < IP4_SIZE_BITS; k++) { + mask = mask | mask << 1; + } + } -public class IP implements Serializable { - short ip1; - short ip2; - public short getIp1() { - return ip1; + public IP(int ip, int mask){ + this.ip = ip; + this.mask = mask; } - public short getIp2() { - return ip2; + + public int getIp() { + return ip; } - public short getIp3() { - return ip3; - } - public short getIp4() { - return ip4; - } - short ip3; - short ip4; - public IP(short _ip1, short _ip2, short _ip3, short _ip4){ - this.ip1 = _ip1; - this.ip2 = _ip2; - this.ip3 = _ip3; - this.ip4 = _ip4; + + public int getMask() { + return mask; + } + + public boolean belongsToMyNetwork(IP ip) { + return (mask == ip.mask) && ((this.ip & ~this.mask) == (ip.ip & ~ip.mask)); } + public String toString(){ - return ip1 + "." + ip2 + "." + ip3 + "." + ip4; + return ((ip >>> BYTE1_SHIFT_POSITION) & BYTE_MASK) + "." + + ((ip >>> BYTE2_SHIFT_POSITION) & BYTE_MASK) + "." + + ((ip >>> BYTE3_SHIFT_POSITION) & BYTE_MASK) + "." + + (ip & BYTE_MASK); } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Photo.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Photo.java index bd5beaf12..3970c615d 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Photo.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Photo.java @@ -36,7 +36,7 @@ */ package ldbc.socialnet.dbgen.objects; -import java.util.TreeSet; +import java.util.HashSet; public class Photo { long photoId; @@ -48,7 +48,7 @@ public class Photo { double latt; double longt; long takenTime; - TreeSet tags; + HashSet tags; int[] interestedUserAccs; long[] interestedUserAccsTimestamp; @@ -114,10 +114,10 @@ public long getTakenTime() { public void setTakenTime(long takenTime) { this.takenTime = takenTime; } - public TreeSet getTags() { + public HashSet getTags() { return tags; } - public void setTags(TreeSet tags) { + public void setTags(HashSet tags) { this.tags = tags; } public double getLatt() { diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/PhotoStream.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/PhotoStream.java deleted file mode 100644 index 47dcc26ea..000000000 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/PhotoStream.java +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (c) 2013 LDBC - * Linked Data Benchmark Council (http://ldbc.eu) - * - * This file is part of ldbc_socialnet_dbgen. - * - * ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ldbc_socialnet_dbgen is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with ldbc_socialnet_dbgen. If not, see . - * - * Copyright (C) 2011 OpenLink Software - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; only Version 2 of the License dated - * June 1991. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ -package ldbc.socialnet.dbgen.objects; - -import java.io.Serializable; -import java.util.TreeSet; - -public class PhotoStream implements Serializable{ - boolean isPhoto = true; - long photoId; - String image; - - long albumId; - int locationIdx; - int creatorId; // Id of user's account - String locationName; - double latt; - double longt; - //long takenTime; // Replaced by createdDate - TreeSet tags; - int[] interestedUserAccs; - - IP ipAddress; - String userAgent; // Send from where e.g., iPhone, Samsung, HTC - - byte browserIdx; - - String title; - long createdDate; - - public PhotoStream(){} - public PhotoStream(Photo photo){ - isPhoto = true; - this.photoId = photo.getPhotoId(); - this.image = photo.getImage(); - this.albumId = photo.getAlbumId(); - this.locationIdx = photo.getLocationIdx(); - this.creatorId = photo.getCreatorId(); // Id of user's account - this.locationName = photo.getLocationName(); - this.latt = photo.getLatt(); - this.longt = photo.getLongt(); - this.createdDate = photo.getTakenTime(); - this.tags = photo.getTags(); - this.interestedUserAccs = photo.getInterestedUserAccs(); - this.ipAddress = photo.getIpAddress(); - this.userAgent = photo.getUserAgent(); - this.browserIdx = photo.getBrowserIdx(); - } - public Photo getPhoto(){ - Photo photo = new Photo(); - photo.setPhotoId(photoId); - photo.setImage(image); - photo.setAlbumId(albumId); - photo.setLocationIdx(locationIdx); - photo.setCreatorId(creatorId); // Id of user's account - photo.setLocationName(locationName); - photo.setLatt(latt); - photo.setLongt(longt); - photo.setTakenTime(createdDate); - photo.setTags(tags); - photo.setInterestedUserAccs(interestedUserAccs); - photo.setIpAddress(ipAddress); - photo.setUserAgent(userAgent); - photo.setBrowserIdx(browserIdx); - - return photo; - } - - public long getCreatedDate() { - return createdDate; - } - public void setCreatedDate(long createdDate) { - this.createdDate = createdDate; - } - public long getPhotoId() { - return photoId; - } - public void setPhotoId(long photoId) { - this.photoId = photoId; - } - public void setImage(String image) { - this.image = image; - } - public String getImage() { - return image; - } - public long getAlbumId() { - return albumId; - } - public void setAlbumId(long albumId) { - this.albumId = albumId; - } - public int getLocationIdx() { - return locationIdx; - } - public void setLocationIdx(int locationIdx) { - this.locationIdx = locationIdx; - } - public int getCreatorId() { - return creatorId; - } - public void setCreatorId(int creatorId) { - this.creatorId = creatorId; - } - public String getLocationName() { - return locationName; - } - public void setLocationName(String locationName) { - this.locationName = locationName; - } - public double getLatt() { - return latt; - } - public void setLatt(double latt) { - this.latt = latt; - } - public double getLongt() { - return longt; - } - public void setLongt(double longt) { - this.longt = longt; - } - public TreeSet getTags() { - return tags; - } - public void setTags(TreeSet tags) { - this.tags = tags; - } - public int[] getInterestedUserAccs() { - return interestedUserAccs; - } - public void setInterestedUserAccs(int[] interestedUserAccs) { - this.interestedUserAccs = interestedUserAccs; - } - public IP getIpAddress() { - return ipAddress; - } - public void setIpAddress(IP ipAddress) { - this.ipAddress = ipAddress; - } - public String getUserAgent() { - return userAgent; - } - public void setUserAgent(String userAgent) { - this.userAgent = userAgent; - } - public byte getBrowserIdx() { - return browserIdx; - } - public void setBrowserIdx(byte browserIdx) { - this.browserIdx = browserIdx; - } - public String getTitle() { - return title; - } - public void setTitle(String title) { - this.title = title; - } - public boolean isPhoto() { - return isPhoto; - } - public void setPhoto(boolean isPhoto) { - this.isPhoto = isPhoto; - } - -} diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Post.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Post.java index 13de72b86..f67dc7e3a 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Post.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Post.java @@ -36,7 +36,7 @@ */ package ldbc.socialnet.dbgen.objects; -import java.util.TreeSet; +import java.util.HashSet; public class Post { long postId; @@ -50,7 +50,7 @@ public class Post { int articleIdx; // Index of articles in the set of same region/interest article int interestIdx; // Index of one interest in user's interests - TreeSet tags; + HashSet tags; int interestedUserAccs[]; //List of users who are interested in the post long interestedUserAccsTimestamp[]; @@ -73,10 +73,10 @@ public void setInterestIdx(int interestIdx) { this.interestIdx = interestIdx; } - public TreeSet getTags() { + public HashSet getTags() { return tags; } - public void setTags(TreeSet tags) { + public void setTags(HashSet tags) { this.tags = tags; } public int[] getInterestedUserAccs() { diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/PostStream.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/PostStream.java deleted file mode 100644 index c563fc8ac..000000000 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/PostStream.java +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (c) 2013 LDBC - * Linked Data Benchmark Council (http://ldbc.eu) - * - * This file is part of ldbc_socialnet_dbgen. - * - * ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ldbc_socialnet_dbgen is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with ldbc_socialnet_dbgen. If not, see . - * - * Copyright (C) 2011 OpenLink Software - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; only Version 2 of the License dated - * June 1991. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ -package ldbc.socialnet.dbgen.objects; - -import java.io.Serializable; -import java.util.TreeSet; - -public class PostStream implements Serializable{ - boolean isaPost = true; - - long postId; - String title; - String content; - long createdDate; - int authorId; - int forumId; - - int articleIdx; // Index of articles in the set of same region/interest article - int interestIdx; // Index of one interest in user's interests - - TreeSet tags; - int interestedUserAccs[]; //List of users who are interested in the post - long interestedUserAccsTimestamp[]; - - IP ipAddress; - String userAgent; // Send from where e.g., iPhone, Samsung, HTC - - byte browserIdx; // Set browser Idx - - boolean isInterestPost; //Only use for group's post - - long commentId; - - long reply_of; //Id of the parent post/comment of this comment - - - public PostStream(){} - public PostStream(Post post){ - isaPost = true; - postId = post.getPostId(); - title = post.getTitle(); - content = post.getContent(); - createdDate = post.getCreatedDate(); - authorId = post.getAuthorId(); - forumId = post.getForumId(); - articleIdx = post.getArticleIdx(); - interestIdx = post.getInterestIdx(); - - tags = post.getTags(); - interestedUserAccs = post.getInterestedUserAccs(); - ipAddress = post.getIpAddress(); - userAgent = post.getUserAgent(); - browserIdx = post.getBrowserIdx(); - isInterestPost = post.isInterestPost(); - } - - public PostStream(Comment comment){ - isaPost = false; - postId = comment.getPostId(); - content = comment.getContent(); - createdDate = comment.getCreateDate(); - authorId = comment.getAuthorId(); - forumId = comment.getForumId(); - - ipAddress = comment.getIpAddress(); - userAgent = comment.getUserAgent(); - browserIdx = comment.getBrowserIdx(); - - commentId = comment.getCommentId(); - reply_of = comment.getReply_of(); - } - - public Comment getComment(){ - Comment comment = new Comment(); - comment.setPostId(postId); - comment.setContent(content); - comment.setCreateDate(createdDate); - comment.setAuthorId(authorId); - comment.setForumId(forumId); - comment.setIpAddress(ipAddress); - comment.setUserAgent(userAgent); - comment.setBrowserIdx(browserIdx); - comment.setCommentId(commentId); - comment.setReply_of(reply_of); - - return comment; - } - public Post getPost(){ - Post post = new Post(); - post.setPostId(postId); - post.setTitle(title); - post.setContent(content); - post.setCreatedDate(createdDate); - post.setAuthorId(authorId); - post.setForumId(forumId); - post.setArticleIdx(articleIdx); - post.setInterestIdx(interestIdx); - post.setTags(tags); - post.setIpAddress(ipAddress); - post.setUserAgent(userAgent); - post.setBrowserIdx(browserIdx); - post.setInterestedUserAccs(interestedUserAccs); - post.setInterestedUserAccsTimestamp(interestedUserAccsTimestamp); - post.setInterestPost(isInterestPost); - - return post; - } - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public long getCreatedDate() { - return createdDate; - } - - public void setCreatedDate(long createdDate) { - this.createdDate = createdDate; - } - public String getContent() { - return content; - } - public void setContent(String content) { - this.content = content; - } - public TreeSet getTags() { - return tags; - } - public void setTags(TreeSet tags) { - this.tags = tags; - } - public int[] getInterestedUserAccs() { - return interestedUserAccs; - } - public void setInterestedUserAccs(int[] interestedUserAccs) { - this.interestedUserAccs = interestedUserAccs; - } - public long[] getInterestedUserAccsTimestamp() { - return interestedUserAccsTimestamp; - } - public void setInterestedUserAccsTimestamp(long[] timestamp) { - this.interestedUserAccsTimestamp = timestamp; - } - public boolean isIsaPost() { - return isaPost; - } - public void setIsaPost(boolean isaPost) { - this.isaPost = isaPost; - } - - public void printPostStream(){ - System.out.println("postId : " + postId ); - System.out.println("title : " + title ); - System.out.println("content : " + content ); - System.out.println("createdDate : " + createdDate ); - System.out.println("authorId : " + authorId ); - System.out.println("forumId : " + forumId ); - System.out.println("articleIdx : " + articleIdx ); - System.out.println("interestIdx : " + interestIdx ); - System.out.println("tags : " + tags.size() ); - System.out.println("interestedUserAccs : " + interestedUserAccs.length ); - System.out.println("interestedUserAccsTimestamp : " + interestedUserAccsTimestamp.length ); - System.out.println("ipAddress : " + ipAddress.toString() ); - System.out.println("userAgent : " + userAgent ); - System.out.println("browserIdx : " + browserIdx ); - System.out.println("commentId : " + commentId ); - System.out.println("reply_of : " + reply_of ); - } - -} diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/ReducedUserProfile.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/ReducedUserProfile.java index b331fcd1b..96832873f 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/ReducedUserProfile.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/ReducedUserProfile.java @@ -42,7 +42,7 @@ import java.io.Serializable; import java.util.Arrays; import java.util.GregorianCalendar; -import java.util.TreeSet; +import java.util.HashSet; import java.util.Iterator; import org.apache.hadoop.io.Writable; @@ -61,7 +61,7 @@ public class ReducedUserProfile implements Serializable, Writable{ Friend friendList[]; - TreeSet friendIds; // Use a hashset for checking the existence + HashSet friendIds; // Use a hashset for checking the existence int dicElementIds[]; // Id of an element in a dictionary, e.g., locationId @@ -82,7 +82,7 @@ public class ReducedUserProfile implements Serializable, Writable{ int cityIdx; int forumWallId; int forumStatusId; - TreeSet setOfTags; + HashSet setOfTags; short popularPlaceIds[]; byte numPopularPlace; @@ -122,7 +122,7 @@ private void readObject(java.io.ObjectInputStream stream) numPassFriendsAdded[i] = stream.readShort(); } friendList = new Friend[numFriends]; - friendIds = new TreeSet(); + friendIds = new HashSet(numFriends); for (int i = 0; i < numFriendsAdded; i++){ Friend fr = new Friend(); fr.readFields(stream); @@ -143,11 +143,9 @@ private void readObject(java.io.ObjectInputStream stream) browserIdx = stream.readByte(); isFrequentChange = stream.readBoolean(); - short ip1 = stream.readShort(); - short ip2 = stream.readShort(); - short ip3 = stream.readShort(); - short ip4 = stream.readShort(); - ipAddress = new IP(ip1, ip2, ip3, ip4); + int ip = stream.readInt(); + int mask = stream.readInt(); + ipAddress = new IP(ip, mask); locationIdx = stream.readInt(); cityIdx = stream.readInt(); @@ -155,7 +153,7 @@ private void readObject(java.io.ObjectInputStream stream) forumStatusId = stream.readInt(); byte numOfTags = stream.readByte(); - setOfTags = new TreeSet(); + setOfTags = new HashSet(numOfTags); for (byte i = 0; i < numOfTags;i++){ setOfTags.add(stream.readInt()); } @@ -204,10 +202,8 @@ private void writeObject(java.io.ObjectOutputStream stream) stream.writeByte(browserIdx); stream.writeBoolean(isFrequentChange); - stream.writeShort(ipAddress.getIp1()); - stream.writeShort(ipAddress.getIp2()); - stream.writeShort(ipAddress.getIp3()); - stream.writeShort(ipAddress.getIp4()); + stream.writeInt(ipAddress.getIp()); + stream.writeInt(ipAddress.getMask()); stream.writeInt(locationIdx); @@ -231,8 +227,7 @@ private void writeObject(java.io.ObjectOutputStream stream) stream.writeByte(gender); stream.writeLong(birthDay); } - - @Override + public void readFields(DataInput arg0) throws IOException { accountId = arg0.readInt(); createdDate = arg0.readLong(); @@ -248,7 +243,7 @@ public void readFields(DataInput arg0) throws IOException { numPassFriendsAdded[i] = arg0.readShort(); } friendList = new Friend[numFriends]; - friendIds = new TreeSet(); + friendIds = new HashSet(numFriends); for (int i = 0; i < numFriendsAdded; i++){ Friend fr = new Friend(); fr.readFields(arg0); @@ -269,11 +264,9 @@ public void readFields(DataInput arg0) throws IOException { browserIdx = arg0.readByte(); isFrequentChange = arg0.readBoolean(); - short ip1 = arg0.readShort(); - short ip2 = arg0.readShort(); - short ip3 = arg0.readShort(); - short ip4 = arg0.readShort(); - ipAddress = new IP(ip1, ip2, ip3, ip4); + int ip = arg0.readInt(); + int mask = arg0.readInt(); + ipAddress = new IP(ip, mask); locationIdx = arg0.readInt(); cityIdx = arg0.readInt(); @@ -281,7 +274,7 @@ public void readFields(DataInput arg0) throws IOException { forumStatusId = arg0.readInt(); byte numTags = arg0.readByte(); - setOfTags = new TreeSet(); + setOfTags = new HashSet(numTags); for (byte i = 0; i < numTags;i++){ setOfTags.add(arg0.readInt()); } @@ -336,7 +329,6 @@ public void copyFields(ReducedUserProfile user){ birthDay = user.getBirthDay(); } - @Override public void write(DataOutput arg0) throws IOException { arg0.writeInt(accountId); arg0.writeLong(createdDate); @@ -369,10 +361,8 @@ public void write(DataOutput arg0) throws IOException { arg0.writeByte(browserIdx); arg0.writeBoolean(isFrequentChange); - arg0.writeShort(ipAddress.getIp1()); - arg0.writeShort(ipAddress.getIp2()); - arg0.writeShort(ipAddress.getIp3()); - arg0.writeShort(ipAddress.getIp4()); + arg0.writeInt(ipAddress.getIp()); + arg0.writeInt(ipAddress.getMask()); arg0.writeInt(locationIdx); @@ -506,7 +496,7 @@ public void setNumFriendsAdded(short numFriendsAdded) { public void allocateFriendListMemory(){ friendList = new Friend[numFriends]; - friendIds = new TreeSet(); + friendIds = new HashSet(numFriends); } public Friend[] getFriendList() { @@ -607,10 +597,10 @@ public int getForumStatusId() { public void setForumStatusId(int forumStatusId) { this.forumStatusId = forumStatusId; } - public TreeSet getSetOfTags() { + public HashSet getSetOfTags() { return setOfTags; } - public void setSetOfTags(TreeSet setOfTags) { + public void setSetOfTags(HashSet setOfTags) { this.setOfTags = setOfTags; } public byte getNumPopularPlace() { @@ -645,7 +635,7 @@ public short[] getNumPassFriendsAdded() { public void setNumPassFriendsAdded(short[] numPassFriendsAdded) { this.numPassFriendsAdded = numPassFriendsAdded; } - public TreeSet getFriendIds() { + public HashSet getFriendIds() { return friendIds; } public int[] getDicElementIds() { @@ -673,4 +663,4 @@ public long getBirthDay() { public void setBirthDay(long birthDay) { this.birthDay = birthDay; } -} +} \ No newline at end of file diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserExtraInfo.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserExtraInfo.java index 9fe87a085..b5ecfc4ca 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserExtraInfo.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserExtraInfo.java @@ -37,14 +37,14 @@ package ldbc.socialnet.dbgen.objects; import java.util.HashMap; -import java.util.TreeSet; +import java.util.HashSet; import java.util.Set; import java.util.Vector; public class UserExtraInfo { String gender; //long dateOfBirth; - TreeSet email; + HashSet email; Vector languages; int nativeLanguage; //String sourceIp; // Source IP address @@ -65,7 +65,7 @@ public class UserExtraInfo { public UserExtraInfo() { - email = new TreeSet(); + email = new HashSet(); companies = new HashMap(); } @@ -90,7 +90,7 @@ public String getGender() { public void setGender(String gender) { this.gender = gender; } - public TreeSet getEmail() { + public HashSet getEmail() { return email; } public void addEmail(String email) { diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserProfile.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserProfile.java index e1741ea03..896055381 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserProfile.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserProfile.java @@ -37,7 +37,7 @@ package ldbc.socialnet.dbgen.objects; import java.io.Serializable; -import java.util.TreeSet; +import java.util.HashSet; import java.util.Iterator; public class UserProfile implements Serializable { @@ -65,9 +65,9 @@ public class UserProfile implements Serializable { public short numFriendsAdded; Friend friendList[]; - TreeSet friendIds; // Use a hashset for checking the existence + HashSet friendIds; // Use a hashset for checking the existence - TreeSet setOfTags; + HashSet setOfTags; int mainTagId; //For user's agent information @@ -95,7 +95,7 @@ public UserProfile(int accountId) { forumWallId = -1; forumStatusId = -1; - setOfTags = new TreeSet(); + setOfTags = new HashSet(); } public byte getGender() { @@ -177,7 +177,7 @@ public void printDetail(){ } public void printTags(){ System.out.println("Set of tag for " + accountId); - Iterator it = setOfTags.iterator(); + Iterator it = setOfTags.iterator(); while (it.hasNext()){ System.out.print(" " + it.next()); } @@ -189,7 +189,7 @@ public short getNumPassFriends(int pass) { public void setNumPassFriends(short numPassFriends, int pass) { this.numPassFriends[pass] = numPassFriends; } - public TreeSet getSetOfTags() { + public HashSet getSetOfTags() { return setOfTags; } public int getFirstTagIdx(){ @@ -199,13 +199,13 @@ public int getFirstTagIdx(){ return tagIdx; } - public void setSetOfTags(TreeSet setOfTags) { + public void setSetOfTags(HashSet setOfTags) { this.setOfTags = setOfTags; } public void allocateFriendListMemory(int numFriendPasses){ friendList = new Friend[numFriends]; - friendIds = new TreeSet(); + friendIds = new HashSet(numFriends); numPassFriends = new short[numFriendPasses]; } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/CSV.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/CSV.java index 745c055ee..0389364cd 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/CSV.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/CSV.java @@ -44,6 +44,8 @@ import java.util.Iterator; import java.util.Vector; +import ldbc.socialnet.dbgen.dictionary.BrowserDictionary; +import ldbc.socialnet.dbgen.dictionary.CompanyDictionary; import ldbc.socialnet.dbgen.dictionary.IPAddressDictionary; import ldbc.socialnet.dbgen.dictionary.LanguageDictionary; import ldbc.socialnet.dbgen.dictionary.LocationDictionary; @@ -63,13 +65,52 @@ import ldbc.socialnet.dbgen.vocabulary.DBPOWL; import ldbc.socialnet.dbgen.vocabulary.SN; - +/** + * CSV serializer. + */ public class CSV implements Serializer { - final String NEWLINE = "\n"; - final String SEPARATOR = "|"; - - final String[] fileNames = { + private FileWriter[][] dataFileWriter; + private int[] currentWriter; + + private long csvRows; + private GregorianCalendar date; + + /** + * Generator input classes. + */ + private CompanyDictionary companyDic; + private HashMap universityToCountry; + private BrowserDictionary browserDic; + private LocationDictionary locationDic; + private LanguageDictionary languageDic; + private TagDictionary tagDic; + private IPAddressDictionary ipDic; + + /** + * Used to create an unique id to each file. It is used only in case of an unnumbered entity or in the relations. + */ + private long[] idList; + + /** + * Used to avoid serialize more than once the same data. + */ + HashMap printedTagClasses; + Vector locations; + Vector serializedLanguages; + Vector organisations; + Vector interests; + Vector tagList; + Vector ipList; + + private final String NEWLINE = "\n"; + private final String SEPARATOR = "|"; + + /** + * The fileName vector and the enum Files serves the purpose of facilitate the serialization method + * in the gatherData methods. Both of them must be coherent or it won't work. + */ + private final String[] fileNames = { "tag", "post", "forum", @@ -138,7 +179,10 @@ enum Files { NUM_FILES } - final String[][] fieldNames = { + /** + * The field names of the CSV files. They are the first thing written in their respective file. + */ + private final String[][] fieldNames = { {"id", "name", "url"}, {"id", "imageFile", "creationDate", "locationIP", "browserUsed", "language", "content"}, {"id", "title", "creationDate"}, @@ -173,40 +217,34 @@ enum Files { {"Forum.id", "Person.id", "joinDate"} }; - private long nrTriples; - private FileWriter[][] dataFileWriter; - int[] currentWriter; - long[] idList; - static long membershipId = 0; - static long friendshipId = 0; - static long gpsId = 0; - static long emailId = 0; - static long ipId = 0; - - HashMap printedTagClasses; - - HashMap companyToCountry; - HashMap universityToCountry; - Vector vBrowserNames; - Vector locations; - Vector serializedLanguages; - Vector organisations; - Vector interests; - Vector tagList; - Vector ipList; - - GregorianCalendar date; - LocationDictionary locationDic; - LanguageDictionary languageDic; - TagDictionary tagDic; - IPAddressDictionary ipDic; - - public CSV(String file, boolean forwardChaining) { - this(file, forwardChaining, 1); - } - - public CSV(String file, boolean forwardChaining, int nrOfOutputFiles) { - vBrowserNames = new Vector(); + /** + * Constructor. + * + * @param file: The basic file name. + * @param nrOfOutputFiles: How many files will be created. + * @param tagDic: The tag dictionary used in the generation. + * @param browsers: The browser dictionary used in the generation. + * @param companyToCountry: HashMap of company names to country IDs. + * @param univesityToCountry: HashMap of universities names to country IDs. + * @param ipDic: The IP dictionary used in the generation. + * @param locationDic: The location dictionary used in the generation. + * @param languageDic: The language dictionary used in the generation. + */ + public CSV(String file, int nrOfOutputFiles, + TagDictionary tagDic, BrowserDictionary browsers, + CompanyDictionary companies, HashMap univesityToCountry, + IPAddressDictionary ipDic, LocationDictionary locationDic, LanguageDictionary languageDic) { + + this.tagDic = tagDic; + this.browserDic = browsers; + this.locationDic = locationDic; + this.languageDic = languageDic; + this.companyDic = companies; + this.universityToCountry = univesityToCountry; + this.ipDic = ipDic; + + csvRows = 0l; + date = new GregorianCalendar(); locations = new Vector(); organisations = new Vector(); interests = new Vector(); @@ -221,7 +259,7 @@ public CSV(String file, boolean forwardChaining, int nrOfOutputFiles) { idList[i] = 0; currentWriter[i] = 0; } - date = new GregorianCalendar(); + int nrOfDigits = ((int)Math.log10(nrOfOutputFiles)) + 1; String formatString = "%0" + nrOfDigits + "d"; try{ @@ -238,7 +276,7 @@ public CSV(String file, boolean forwardChaining, int nrOfOutputFiles) { } } - for(int i=0;i arguments = new Vector(); for (int k = 0; k < fieldNames[j].length; k++) { @@ -249,32 +287,59 @@ public CSV(String file, boolean forwardChaining, int nrOfOutputFiles) { } } catch(IOException e){ - System.err.println("Could not open File for writing."); System.err.println(e.getMessage()); System.exit(-1); } - nrTriples = 0l; } - public CSV(String file, boolean forwardChaining, int nrOfOutputFiles, - TagDictionary tagDic, Vector _vBrowsers, - HashMap companyToCountry, HashMap univesityToCountry, - IPAddressDictionary ipDic, LocationDictionary locationDic, LanguageDictionary languageDic) { - - this(file, forwardChaining, nrOfOutputFiles); - this.tagDic = tagDic; - this.vBrowserNames = _vBrowsers; - this.locationDic = locationDic; - this.languageDic = languageDic; - this.companyToCountry = companyToCountry; - this.universityToCountry = univesityToCountry; - this.ipDic = ipDic; - } - - public Long triplesGenerated() { - return nrTriples; + /** + * Returns the number of CSV rows written. + */ + public Long unitsGenerated() { + return csvRows; } + /** + * Writes the data into the appropriate file. + * + * @param column: The column data. + * @param index: The file index. + */ + public void ToCSV(Vector columns, int index) { + StringBuffer result = new StringBuffer(); + result.append(columns.get(0)); + for (int i = 1; i < columns.size(); i++) { + result.append(SEPARATOR); + result.append(columns.get(i)); + } + result.append(NEWLINE); + WriteTo(result.toString(), index); + columns.clear(); + idList[index]++; + } + + /** + * Writes the data into the appropriate file. + * + * @param data: The string data. + * @param index: The file index. + */ + public void WriteTo(String data, int index) { + try { + dataFileWriter[currentWriter[index]][index].append(data); + currentWriter[index] = (currentWriter[index] + 1) % dataFileWriter.length; + csvRows++; + } catch (IOException e) { + System.out.println("Cannot write to output file "); + e.printStackTrace(); + } + } + + /** + * Serializes the tag data and its class hierarchy. + * + * @param tagId: The tag id. + */ public void printTagHierarchy(Integer tagId) { Vector arguments = new Vector(); Integer tagClass = tagDic.getTagClass(tagId); @@ -304,29 +369,11 @@ public void printTagHierarchy(Integer tagId) { } } - public void ToCSV(Vector arguments, int index) { - StringBuffer result = new StringBuffer(); - result.append(arguments.get(0)); - for (int i = 1; i < arguments.size(); i++) { - result.append(SEPARATOR); - result.append(arguments.get(i)); - } - result.append(NEWLINE); - WriteTo(result.toString(), index); - arguments.clear(); - idList[index]++; - } - - public void WriteTo(String data, int index) { - try { - dataFileWriter[currentWriter[index]][index].append(data); - currentWriter[index] = (currentWriter[index] + 1) % dataFileWriter.length; - } catch (IOException e) { - System.out.println("Cannot write to output file "); - e.printStackTrace(); - } - } - + /** + * Writes the base location and its hierarchy. + * + * @param baseId: The base location id. + */ public void printLocationHierarchy(int baseId) { Vector arguments = new Vector(); @@ -339,7 +386,6 @@ public void printLocationHierarchy(int baseId) { for (int i = areas.size() - 1; i >= 0; i--) { if (locations.indexOf(areas.get(i)) == -1) { locations.add(areas.get(i)); - //print location arguments.add(Integer.toString(areas.get(i))); arguments.add(locationDic.getLocationName(areas.get(i))); arguments.add(DBP.getUrl(locationDic.getLocationName(areas.get(i)))); @@ -355,6 +401,9 @@ public void printLocationHierarchy(int baseId) { } } + /** + * @See {@link ldbc.socialnet.dbgen.serializer.Serializer#gatherData(ReducedUserProfile, UserExtraInfo)} + */ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ Vector arguments = new Vector(); @@ -367,7 +416,7 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ Iterator itString = extraInfo.getCompanies().iterator(); while (itString.hasNext()) { String company = itString.next(); - int parentId = companyToCountry.get(company); + int parentId = companyDic.getCountry(company); printLocationHierarchy(parentId); } printLocationHierarchy(universityToCountry.get(extraInfo.getOrganization())); @@ -396,7 +445,7 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ arguments.add(empty); } if (profile.getBrowserIdx() >= 0) { - arguments.add(vBrowserNames.get(profile.getBrowserIdx())); + arguments.add(browserDic.getName(profile.getBrowserIdx())); } else { String empty = ""; arguments.add(empty); @@ -439,16 +488,16 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ arguments.add(Integer.toString(universityToCountry.get(extraInfo.getOrganization()))); ToCSV(arguments, Files.ORGANISATION_BASED_NEAR_PLACE.ordinal()); } - } - - if (extraInfo.getClassYear() != -1 ) { - date.setTimeInMillis(extraInfo.getClassYear()); - dateString = DateGenerator.formatYear(date); - - arguments.add(Integer.toString(profile.getAccountId())); - arguments.add(SN.formId(organisationId)); - arguments.add(dateString); - ToCSV(arguments, Files.PERSON_STUDY_AT_ORGANISATION.ordinal()); + + if (extraInfo.getClassYear() != -1 ) { + date.setTimeInMillis(extraInfo.getClassYear()); + dateString = DateGenerator.formatYear(date); + + arguments.add(Integer.toString(profile.getAccountId())); + arguments.add(SN.formId(organisationId)); + arguments.add(dateString); + ToCSV(arguments, Files.PERSON_STUDY_AT_ORGANISATION.ordinal()); + } } itString = extraInfo.getCompanies().iterator(); @@ -466,7 +515,7 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ ToCSV(arguments, Files.ORGANISATION.ordinal()); arguments.add(SN.formId(organisationId)); - arguments.add(Integer.toString(companyToCountry.get(company))); + arguments.add(Integer.toString(companyDic.getCountry(company))); ToCSV(arguments, Files.ORGANISATION_BASED_NEAR_PLACE.ordinal()); } date.setTimeInMillis(extraInfo.getWorkFrom(company)); @@ -499,8 +548,8 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ ToCSV(arguments, Files.PERSON_INTEREST_TAG.ordinal()); } - Friend friends[] = profile.getFriendList(); - for (int i = 0; i < friends.length; i ++) { + Friend friends[] = profile.getFriendList(); + for (int i = 0; i < friends.length; i++) { if (friends[i] != null && friends[i].getCreatedTime() != -1){ arguments.add(Integer.toString(profile.getAccountId())); @@ -544,10 +593,17 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ } } + /** + * @See {@link ldbc.socialnet.dbgen.serializer.Serializer#gatherData(Post)} + */ public void gatherData(Post post){ Vector arguments = new Vector(); String empty = ""; + if (post.getIpAddress() != null) { + printLocationHierarchy(ipDic.getLocation(post.getIpAddress())); + } + arguments.add(SN.formId(post.getPostId())); arguments.add(empty); date.setTimeInMillis(post.getCreatedDate()); @@ -559,7 +615,7 @@ public void gatherData(Post post){ arguments.add(empty); } if (post.getBrowserIdx() != -1){ - arguments.add(vBrowserNames.get(post.getBrowserIdx())); + arguments.add(browserDic.getName(post.getBrowserIdx())); } else { arguments.add(empty); } @@ -616,9 +672,16 @@ public void gatherData(Post post){ } } + /** + * @See {@link ldbc.socialnet.dbgen.serializer.Serializer#gatherData(Comment)} + */ public void gatherData(Comment comment){ Vector arguments = new Vector(); + if (comment.getIpAddress() != null) { + printLocationHierarchy(ipDic.getLocation(comment.getIpAddress())); + } + date.setTimeInMillis(comment.getCreateDate()); String dateString = DateGenerator.formatDateDetail(date); arguments.add(SN.formId(comment.getCommentId())); @@ -630,7 +693,7 @@ public void gatherData(Comment comment){ arguments.add(empty); } if (comment.getBrowserIdx() != -1){ - arguments.add(vBrowserNames.get(comment.getBrowserIdx())); + arguments.add(browserDic.getName(comment.getBrowserIdx())); } else { String empty = ""; arguments.add(empty); @@ -658,8 +721,15 @@ public void gatherData(Comment comment){ ToCSV(arguments, Files.COMMENT_HAS_CREATOR_PERSON.ordinal()); } + /** + * @See {@link ldbc.socialnet.dbgen.serializer.Serializer#gatherData(Photo)} + */ public void gatherData(Photo photo){ Vector arguments = new Vector(); + + if (photo.getIpAddress() != null) { + printLocationHierarchy(ipDic.getLocation(photo.getIpAddress())); + } String empty = ""; arguments.add(SN.formId(photo.getPhotoId())); @@ -673,7 +743,7 @@ public void gatherData(Photo photo){ arguments.add(empty); } if (photo.getBrowserIdx() != -1){ - arguments.add(vBrowserNames.get(photo.getBrowserIdx())); + arguments.add(browserDic.getName(photo.getBrowserIdx())); } else { arguments.add(empty); } @@ -726,6 +796,9 @@ public void gatherData(Photo photo){ } } + /** + * @See {@link ldbc.socialnet.dbgen.serializer.Serializer#gatherData(Group)} + */ public void gatherData(Group group) { Vector arguments = new Vector(); @@ -774,7 +847,10 @@ public void gatherData(Group group) { } } - public void serialize() { + /** + * Ends the serialization. + */ + public void close() { try { for (int i = 0; i < dataFileWriter.length; i++) { for (int j = 0; j < Files.NUM_FILES.ordinal(); j++) { diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Serializer.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Serializer.java index aa1f3b545..cc5cc7465 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Serializer.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Serializer.java @@ -43,20 +43,70 @@ import ldbc.socialnet.dbgen.objects.ReducedUserProfile; import ldbc.socialnet.dbgen.objects.UserExtraInfo; - +/** + * The ldbc socialnet generator serialize interface. The user of this interface has control of how the entities + * are parsed with the gatherData methods. + * + * To ensure the correct serialization the close method must be called in the end so the serializer is able + * to flush any non written data and close the files used. + */ public interface Serializer { - public void serialize(); + /** + * Closes the serializer and flush the remaining data. Once called any GatherData method will produce I/O excpetions. + */ + public void close(); - public Long triplesGenerated(); + /** + * Returns how many serializer units (p.e.g rows in csv or triples in RDF) have been generated. + */ + public Long unitsGenerated(); + /** + * Serializes the user information. + * Aside from the user itself this includes: + * · The location hierarchy of the user location and any university and country he has work with. + * · The company and university data. + * · The forum of this user wall. + * · The tag data from its interests. + * + * @param user: The user. + * @param extraInfo: The user cosmetic data. + */ public void gatherData(ReducedUserProfile user, UserExtraInfo extraInfo); + /** + * Serializes the post information. + * Aside from the post itself this includes: + * · The location hierarchy of its location (via IP). + * · Its tag data. + * + * @param post: The post. + */ public void gatherData(Post post); + /** + * Serializes the photo information. + * Aside from the photo itself this includes: + * · The location hierarchy of its location (via IP). + * · Its tag data. + * @param photo: The photo. + */ public void gatherData(Photo photo); + /** + * Serializes the comment information. + * Aside from the comment itself this includes: + * · The location hierarchy of its location (via IP). + * · Its tag data. + * @param comment: The comment. + */ public void gatherData(Comment comment); + /** + * Serializes the group information. + * + * @param group: The group. + */ public void gatherData(Group group); } \ No newline at end of file diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Turtle.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Turtle.java index 5219bf130..a3e0c37a9 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Turtle.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Turtle.java @@ -44,6 +44,8 @@ import java.util.Iterator; import java.util.Vector; +import ldbc.socialnet.dbgen.dictionary.BrowserDictionary; +import ldbc.socialnet.dbgen.dictionary.CompanyDictionary; import ldbc.socialnet.dbgen.dictionary.IPAddressDictionary; import ldbc.socialnet.dbgen.dictionary.LanguageDictionary; import ldbc.socialnet.dbgen.dictionary.LocationDictionary; @@ -60,7 +62,6 @@ import ldbc.socialnet.dbgen.objects.UserExtraInfo; import ldbc.socialnet.dbgen.vocabulary.DBP; import ldbc.socialnet.dbgen.vocabulary.DBPOWL; -import ldbc.socialnet.dbgen.vocabulary.DBPPROP; import ldbc.socialnet.dbgen.vocabulary.FOAF; import ldbc.socialnet.dbgen.vocabulary.RDF; import ldbc.socialnet.dbgen.vocabulary.RDFS; @@ -68,50 +69,84 @@ import ldbc.socialnet.dbgen.vocabulary.SNVOC; import ldbc.socialnet.dbgen.vocabulary.XSD; - +/** + * Turtle serializer. + */ public class Turtle implements Serializer { private static final String STATIC_DBP_DATA_FILE = "static_dbp"; - private boolean isTurtle; - private long nrTriples; private FileWriter[] dataFileWriter; private FileWriter[] staticdbpFileWriter; - int currentWriter = 0; - static long membershipId = 0; - static long friendshipId = 0; - static long gpsId = 0; - static long likeId = 0; - static long workatId = 0; - static long studyAt = 0; - static long locationPartOfId = 0; - static long speakId = 0; + private int currentWriter = 0; - HashMap companyToCountry; - HashMap universityToCountry; - HashMap printedIpaddresses; - HashMap printedOrganizations; - HashMap printedLanguages; - HashMap printedTags; - HashMap printedTagClasses; - HashMap printedLocations; - Vector vBrowserNames; + private long nrTriples; + private GregorianCalendar date; + + /** + * Generator input classes. + */ + private boolean isTurtle; + private CompanyDictionary companyDic; + private HashMap universityToCountry; + private BrowserDictionary browserDic; + private LocationDictionary locationDic; + private LanguageDictionary languageDic; + private TagDictionary tagDic; + private IPAddressDictionary ipDic; + + /** + * Used to give an unique ID to blank nodes. + */ + private long membershipId = 0; + private long likeId = 0; + private long workAtId = 0; + private long studyAtId = 0; - GregorianCalendar date; - LocationDictionary locationDic; - LanguageDictionary languageDic; - TagDictionary tagDic; - IPAddressDictionary ipDic; + /** + * Used to avoid serialize more than once the same data. + */ + private HashMap printedOrganizations; + private HashMap printedTags; + private HashMap printedTagClasses; + private HashMap printedLocations; - public Turtle(String file, boolean forwardChaining, int nrOfOutputFiles, boolean isTurtle) { + /** + * Constructor. + * + * @param file: The basic file name. + * @param nrOfOutputFiles: How many files will be created. + * @param isTurtle: If the RDF admits turtle abbreviation syntax. + * @param tagDic: The tag dictionary used in the generation. + * @param browsers: The browser dictionary used in the generation. + * @param companyToCountry: The company dictionaty used in the generation. + * @param univesityToCountry: HashMap of universities names to country IDs. + * @param ipDic: The IP dictionary used in the generation. + * @param locationDic: The location dictionary used in the generation. + * @param languageDic: The language dictionary used in the generation. + */ + public Turtle(String file, int nrOfOutputFiles, boolean isTurtle, + TagDictionary tagDic, BrowserDictionary browsers, + CompanyDictionary companies, HashMap univesityToCountry, + IPAddressDictionary ipDic, LocationDictionary locationDic, + LanguageDictionary languageDic) { + this.isTurtle = isTurtle; + this.tagDic = tagDic; + this.browserDic = browsers; + this.locationDic = locationDic; + this.companyDic = companies; + this.universityToCountry = univesityToCountry; + this.ipDic = ipDic; + this.languageDic = languageDic; + + nrTriples = 0l; date = new GregorianCalendar(); - printedIpaddresses = new HashMap(); printedOrganizations = new HashMap(); - printedLanguages = new HashMap(); printedTags = new HashMap(); printedTagClasses = new HashMap(); printedLocations = new HashMap(); + int nrOfDigits = ((int)Math.log10(nrOfOutputFiles)) + 1; String formatString = "%0" + nrOfDigits + "d"; try{ @@ -127,46 +162,30 @@ public Turtle(String file, boolean forwardChaining, int nrOfOutputFiles, boolean this.staticdbpFileWriter[i-1] = new FileWriter(file+STATIC_DBP_DATA_FILE + String.format(formatString, i) + extension); } } - - } catch(IOException e) { - System.err.println("Could not open File for writing."); - System.err.println(e.getMessage()); - System.exit(-1); - } - - try { - for(int i=0;i _vBrowsers, - HashMap companyToCountry, HashMap univesityToCountry, - IPAddressDictionary ipDic, LocationDictionary locationDic, - LanguageDictionary languageDic) { - this(file, forwardChaining, nrOfOutputFiles, isTurtle); - this.tagDic = tagDic; - this.vBrowserNames = _vBrowsers; - this.locationDic = locationDic; - this.companyToCountry = companyToCountry; - this.universityToCountry = univesityToCountry; - this.ipDic = ipDic; - this.languageDic = languageDic; - } - - public Long triplesGenerated() { + /** + * Returns how many triples have been generated. + */ + public Long unitsGenerated() { return nrTriples; } + /** + * Writes the collected data to the appropriate file. + * + * @param data: The string to write. + */ public void toWriter(String data){ try { dataFileWriter[currentWriter].append(data); @@ -178,49 +197,61 @@ public void toWriter(String data){ } } + /** + * Gets the namespace for the generator file. + */ private String getNamespaces() { - StringBuffer result = new StringBuffer(); - createPrefixLine(result, RDF.PREFIX, RDF.NS); - createPrefixLine(result, RDFS.PREFIX, RDFS.NS); - createPrefixLine(result, XSD.PREFIX, XSD.NS); - createPrefixLine(result, SNVOC.PREFIX, SNVOC.NS); - createPrefixLine(result, SN.PREFIX, SN.NS); - createPrefixLine(result, DBP.PREFIX, DBP.NS); + StringBuffer result = new StringBuffer(350); + createPrefixLine(result, RDF.PREFIX, RDF.NAMESPACE); + createPrefixLine(result, RDFS.PREFIX, RDFS.NAMESPACE); + createPrefixLine(result, XSD.PREFIX, XSD.NAMESPACE); + createPrefixLine(result, SNVOC.PREFIX, SNVOC.NAMESPACE); + createPrefixLine(result, SN.PREFIX, SN.NAMESPACE); + createPrefixLine(result, DBP.PREFIX, DBP.NAMESPACE); return result.toString(); } + /** + * Gets the namespace for the static dbpedia file. + */ private String getStaticNamespaces() { - StringBuffer result = new StringBuffer(); - createPrefixLine(result, RDF.PREFIX, RDF.NS); - createPrefixLine(result, RDFS.PREFIX, RDFS.NS); - createPrefixLine(result, FOAF.PREFIX, FOAF.NS); - createPrefixLine(result, DBP.PREFIX, DBP.NS); - createPrefixLine(result, DBPOWL.PREFIX, DBPOWL.NS); - createPrefixLine(result, DBPPROP.PREFIX, DBPPROP.NS); - createPrefixLine(result, SNVOC.PREFIX, SNVOC.NS); + StringBuffer result = new StringBuffer(400); + createPrefixLine(result, RDF.PREFIX, RDF.NAMESPACE); + createPrefixLine(result, RDFS.PREFIX, RDFS.NAMESPACE); + createPrefixLine(result, FOAF.PREFIX, FOAF.NAMESPACE); + createPrefixLine(result, DBP.PREFIX, DBP.NAMESPACE); + createPrefixLine(result, DBPOWL.PREFIX, DBPOWL.NAMESPACE); + createPrefixLine(result, SNVOC.PREFIX, SNVOC.NAMESPACE); return result.toString(); } + /** + * + * @param result: The StringBuffer to append to. + * @param prefix: The RDF namespace prefix abbreviation. + * @param namespace: The RDF namespace. + */ private void createPrefixLine(StringBuffer result, String prefix, String namespace) { result.append("@prefix "); result.append(prefix); result.append(" "); - result.append(createURIref(namespace)); - result.append(" .\n"); - } - - private String createURIref(String uri) { - StringBuffer result = new StringBuffer(); result.append("<"); - result.append(uri); + result.append(namespace); result.append(">"); - return result.toString(); + result.append(" .\n"); } - private void writeDBPData(String left, String middle, String right) { + /** + * Writes a RDF triple in the dbpedia static data file. + * + * @param subject: The RDF subject. + * @param predicate: The RDF predicate. + * @param object: The RDF object. + */ + private void writeDBPData(String subject, String predicate, String object) { try { - StringBuffer result = new StringBuffer(); - createTripleSPO(result, left, middle, right); + StringBuffer result = new StringBuffer(150); + createTripleSPO(result, subject, predicate, object); staticdbpFileWriter[currentWriter].append(result); } catch (IOException e) { System.out.println("Cannot write to output file "); @@ -228,6 +259,13 @@ private void writeDBPData(String left, String middle, String right) { } } + /** + * Serializes the tag data and its class hierarchy. + * + * @param result: The StringBuffer to append to. + * @param tagId: The tag id. + * @param tag: The tag name. + */ private void writeTagData(StringBuffer result, Integer tagId, String tag) { String description = tagDic.getDescription(tagId); writeDBPData(DBP.fullPrefixed(tag), FOAF.Name, createLiteral(description.replace("\"", "\\\""))); @@ -256,36 +294,60 @@ private void writeTagData(StringBuffer result, Integer tagId, String tag) { } } + /** + * Adds the appropriate triple kind into the input StringBuffer. + * + * @param result: The StringBuffer to append to. + * @param beginning: The beggining of a subject abbreviation block. + * @param end: The end of a subject abbreviation block. + * @param subject: The RDF subject. + * @param predicate: The RDF predicate. + * @param object: The RDF first object. + * @param object: The RDF second object. + */ private void AddTriple(StringBuffer result, boolean beginning, - boolean end, String left, String middle, String right, String extra, boolean ignoreExtra) { - if (isTurtle && beginning) { - result.append(left + "\n"); - } - if (isTurtle && end){ - if (extra.isEmpty()) { - result.append(createTriplePOEnd(middle, right)); - } else { - result.append(createTriplePOOEnd(middle, right, extra)); - } - } else if (isTurtle) { - if (extra.isEmpty()) { - result.append(createTriplePO(middle, right)); - } else { - result.append(createTriplePOO(middle, right, extra)); + boolean end, String subject, String predicate, String object1, String object2) { + + if (isTurtle) { + + if (beginning) { + result.append(subject + "\n"); + } + + if (object2.isEmpty()) { + createTriplePO(result, predicate, object1, end); + } else { + createTriplePOO(result, predicate, object1, object2, end); } - } else { - createTripleSPO(result, left, middle, right); - if (!extra.isEmpty() && !ignoreExtra) { - createTripleSPO(result, left, middle, extra); + } else { + createTripleSPO(result, subject, predicate, object1); + if (!object2.isEmpty()) { + createTripleSPO(result, subject, predicate, object2); } } } + /** + * Adds the appropriate triple kind into the input StringBuffer. + * + * @param result: The StringBuffer to append to. + * @param beginning: The beggining of a subject abbreviation block. + * @param end: The end of a subject abbreviation block. + * @param subject: The RDF subject. + * @param predicate: The RDF predicate. + * @param object: The RDF object. + */ private void AddTriple(StringBuffer result, boolean beginning, - boolean end, String left, String middle, String right) { - AddTriple(result, beginning, end, left, middle, right, "", false); + boolean end, String subject, String predicate, String object) { + AddTriple(result, beginning, end, subject, predicate, object, ""); } + /** + * Writes the base location and its hierarchy. + * + * @param result: The StringBuffer to append to. + * @param baseId: The base location id. + */ public void printLocationHierarchy(StringBuffer result, int baseId) { ArrayList areas = new ArrayList(); do { @@ -315,8 +377,11 @@ public void printLocationHierarchy(StringBuffer result, int baseId) { } } + /** + * @See {@link ldbc.socialnet.dbgen.serializer.Serializer#gatherData(ReducedUserProfile, UserExtraInfo)} + */ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ - StringBuffer result = new StringBuffer(); + StringBuffer result = new StringBuffer(19000); if (extraInfo == null) { System.err.println("LDBC socialnet must serialize the extraInfo"); @@ -327,7 +392,7 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ Iterator itString = extraInfo.getCompanies().iterator(); while (itString.hasNext()) { String company = itString.next(); - int parentId = companyToCountry.get(company); + int parentId = companyDic.getCountry(company); printLocationHierarchy(result, parentId); } printLocationHierarchy(result, universityToCountry.get(extraInfo.getOrganization())); @@ -360,7 +425,7 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ createLiteral(profile.getIpAddress().toString())); if (profile.getBrowserIdx() >= 0) { AddTriple(result, false, false, prefix, SNVOC.browser, - createLiteral(vBrowserNames.get(profile.getBrowserIdx()))); + createLiteral(browserDic.getName(profile.getBrowserIdx()))); } } @@ -382,24 +447,24 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ createTripleSPO(result, DBP.fullPrefixed(extraInfo.getOrganization()), SNVOC.locatedIn, DBP.fullPrefixed(locationDic.getLocationName(locationId))); } - createTripleSPO(result, prefix, SNVOC.studyAt, SN.getStudyAtURI(studyAt)); - createTripleSPO(result, SN.getStudyAtURI(studyAt), SNVOC.hasOrganisation, - DBP.fullPrefixed(extraInfo.getOrganization())); + if (extraInfo.getClassYear() != -1 ){ + createTripleSPO(result, prefix, SNVOC.studyAt, SN.getStudyAtURI(studyAtId)); + createTripleSPO(result, SN.getStudyAtURI(studyAtId), SNVOC.hasOrganisation, + DBP.fullPrefixed(extraInfo.getOrganization())); date.setTimeInMillis(extraInfo.getClassYear()); String yearString = DateGenerator.formatYear(date); - createTripleSPO(result, SN.getStudyAtURI(studyAt), SNVOC.classYear, + createTripleSPO(result, SN.getStudyAtURI(studyAtId), SNVOC.classYear, createDataTypeLiteral(yearString, XSD.Integer)); } - studyAt++; + studyAtId++; } Vector languages = extraInfo.getLanguages(); for (int i = 0; i < languages.size(); i++) { createTripleSPO(result, prefix, SNVOC.speaks, createLiteral(languageDic.getLanguagesName(languages.get(i)))); - speakId++; } itString = extraInfo.getCompanies().iterator(); @@ -411,20 +476,20 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ writeDBPData(DBP.fullPrefixed(company), RDF.type, DBPOWL.Organisation); writeDBPData(DBP.fullPrefixed(company), FOAF.Name, createLiteral(company)); - int locationId = companyToCountry.get(company); + int locationId = companyDic.getCountry(company); createTripleSPO(result, DBP.fullPrefixed(company), SNVOC.locatedIn, DBP.fullPrefixed(locationDic.getLocationName(locationId))); } - createTripleSPO(result, prefix, SNVOC.workAt, SN.getWorkAtURI(workatId)); - createTripleSPO(result, SN.getWorkAtURI(workatId), SNVOC.hasOrganisation, + createTripleSPO(result, prefix, SNVOC.workAt, SN.getWorkAtURI(workAtId)); + createTripleSPO(result, SN.getWorkAtURI(workAtId), SNVOC.hasOrganisation, DBP.fullPrefixed(company)); date.setTimeInMillis(extraInfo.getWorkFrom(company)); String yearString = DateGenerator.formatYear(date); - createTripleSPO(result, SN.getWorkAtURI(workatId), SNVOC.workFrom, + createTripleSPO(result, SN.getWorkAtURI(workAtId), SNVOC.workFrom, createDataTypeLiteral(yearString, XSD.Integer)); - workatId++; + workAtId++; } itString = extraInfo.getEmail().iterator(); @@ -446,12 +511,10 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ } Friend friends[] = profile.getFriendList(); - for (int i = 0; i < friends.length; i ++){ - if (friends[i] != null){ - if (friends[i].getCreatedTime() != -1){ - createTripleSPO(result, prefix, SNVOC.knows, - SN.getPersonURI(friends[i].getFriendAcc())); - } + for (int i = 0; i < friends.length; i++){ + if (friends[i] != null && friends[i].getCreatedTime() != -1){ + createTripleSPO(result, prefix, SNVOC.knows, + SN.getPersonURI(friends[i].getFriendAcc())); } } @@ -493,8 +556,11 @@ public void gatherData(ReducedUserProfile profile, UserExtraInfo extraInfo){ toWriter(result.toString()); } + /** + * @See {@link ldbc.socialnet.dbgen.serializer.Serializer#gatherData(Post)} + */ public void gatherData(Post post){ - StringBuffer result = new StringBuffer(); + StringBuffer result = new StringBuffer(2500); if (post.getIpAddress() != null) { printLocationHierarchy(result, ipDic.getLocation(post.getIpAddress())); @@ -511,7 +577,7 @@ public void gatherData(Post post){ createLiteral(post.getIpAddress().toString())); if (post.getBrowserIdx() >= 0) { AddTriple(result, false, false, prefix, SNVOC.browser, - createLiteral(vBrowserNames.get(post.getBrowserIdx()))); + createLiteral(browserDic.getName(post.getBrowserIdx()))); } } AddTriple(result, false, true, prefix, SNVOC.content, @@ -562,8 +628,15 @@ public void gatherData(Post post){ toWriter(result.toString()); } + /** + * @See {@link ldbc.socialnet.dbgen.serializer.Serializer#gatherData(Comment)} + */ public void gatherData(Comment comment){ - StringBuffer result = new StringBuffer(); + StringBuffer result = new StringBuffer(2000); + + if (comment.getIpAddress() != null) { + printLocationHierarchy(result, ipDic.getLocation(comment.getIpAddress())); + } String prefix = SN.getCommentURI(comment.getCommentId()); date.setTimeInMillis(comment.getCreateDate()); @@ -576,7 +649,7 @@ public void gatherData(Comment comment){ createLiteral(comment.getIpAddress().toString())); if (comment.getBrowserIdx() >= 0) { AddTriple(result, false, false, prefix, SNVOC.browser, - createLiteral(vBrowserNames.get(comment.getBrowserIdx()))); + createLiteral(browserDic.getName(comment.getBrowserIdx()))); } } AddTriple(result, false, true, prefix, SNVOC.content, createLiteral(comment.getContent())); @@ -595,8 +668,15 @@ public void gatherData(Comment comment){ toWriter(result.toString()); } + /** + * @See {@link ldbc.socialnet.dbgen.serializer.Serializer#gatherData(Photo)} + */ public void gatherData(Photo photo){ - StringBuffer result = new StringBuffer(); + StringBuffer result = new StringBuffer(2500); + + if (photo.getIpAddress() != null) { + printLocationHierarchy(result, ipDic.getLocation(photo.getIpAddress())); + } String prefix = SN.getPostURI(photo.getPhotoId()); AddTriple(result, true, false, prefix, RDF.type, SNVOC.Post); @@ -608,7 +688,7 @@ public void gatherData(Photo photo){ createLiteral(photo.getIpAddress().toString())); if (photo.getBrowserIdx() >= 0) { AddTriple(result, false, false, prefix, SNVOC.browser, - createLiteral(vBrowserNames.get(photo.getBrowserIdx()))); + createLiteral(browserDic.getName(photo.getBrowserIdx()))); } } AddTriple(result, false, true, prefix, SNVOC.creationDate, @@ -649,12 +729,14 @@ public void gatherData(Photo photo){ toWriter(result.toString()); } + /** + * @See {@link ldbc.socialnet.dbgen.serializer.Serializer#gatherData(Group)} + */ public void gatherData(Group group){ - StringBuffer result = new StringBuffer(); + StringBuffer result = new StringBuffer(12000); date.setTimeInMillis(group.getCreatedDate()); String dateString = DateGenerator.formatDateDetail(date); - // Forums of the group String forumPrefix = SN.getForumURI(group.getForumWallId()); AddTriple(result, true, false, forumPrefix, RDF.type, SNVOC.Forum); AddTriple(result, false, false, forumPrefix, SNVOC.title, createLiteral(group.getGroupName())); @@ -694,25 +776,40 @@ public void gatherData(Group group){ toWriter(result.toString()); } + /** + * Builds a plain RDF literal. + * + * SeeRDF literals. + * + * @param value: The value. + * @return The RDF literal string representation. + */ private String createLiteral(String value) { - StringBuffer result = new StringBuffer(); - result.append("\""); - result.append(value); - result.append("\""); - return result.toString(); + return "\"" + value + "\""; } + /** + * Builds a typed RDF literal. + * + * SeeRDF literals. + * + * @param value: The literal value. + * @param datatypeURI: The data type. + * @return The RDF typed literal string representation. + */ private String createDataTypeLiteral(String value, String datatypeURI) { - StringBuffer result = new StringBuffer(); - result.append("\""); - result.append(value); - result.append("\"^^"); - result.append(datatypeURI); - return result.toString(); + return "\"" + value + "\"^^" + datatypeURI; } - /* - * Create a triple consisting of subject predicate and object, end with "." + /** + * Builds a simple turtle triple: subject predicate object . + * + * See Turtle + * + * @param result: The stringBuffer where the triple representation will be appended to. + * @param subject: The RDF subject. + * @param predicate: The RDF predicate. + * @param object: The RDF object. */ private void createTripleSPO(StringBuffer result, String subject, String predicate, String object) { result.append(subject); @@ -724,71 +821,62 @@ private void createTripleSPO(StringBuffer result, String subject, String predica nrTriples++; } - /* - * Create an abbreviated triple consisting of predicate and object; end with ";" - */ - private String createTriplePO(String predicate, String object) { - StringBuffer result = new StringBuffer(); - result.append(" "); - result.append(predicate); - result.append(" "); - result.append(object); - result.append(" ;\n"); - - nrTriples++; - - return result.toString(); - } - - /* - * Create an abbreviated triple consisting of predicate and object; end with "." - */ - private String createTriplePOEnd(String predicate, String object) { - StringBuffer result = new StringBuffer(); + + /** + * Builds a subject abbreviated turtle triple. + * + * See Turtle + * + * @param result: The stringBuffer where the triple representation will be appended to. + * @param predicate: The RDF predicate. + * @param object: The RDF object. + * @param endSubjectRepeat: The marker to end the subject repetition symbol. + */ + private void createTriplePO(StringBuffer result, String predicate, String object, boolean endSubjectRepeat) { result.append(" "); result.append(predicate); result.append(" "); result.append(object); - result.append(" .\n"); + if (endSubjectRepeat) { + result.append(" .\n"); + } else { + result.append(" ;\n"); + } nrTriples++; - - return result.toString(); - } - - // Create a triple with two objects separated by an ",". the triple is ended with ";" - private String createTriplePOO(String predicate, String object1, String object2) { - StringBuffer result = new StringBuffer(); - result.append(" "); - result.append(predicate); - result.append(" "); - result.append(object1); - result.append(" , "); - result.append(object2); - result.append(" ;\n"); - - nrTriples = nrTriples + 2; - - return result.toString(); } - // Create a triple with two objects separated by an ",". the triple is ended with "." - private String createTriplePOOEnd(String predicate, String object1, String object2) { - StringBuffer result = new StringBuffer(); + /** + * Builds a subject abbreviated turtle triple with two objects. + * + * See Turtle + * + * @param result: The stringBuffer where the triple representation will be appended to. + * @param predicate: The RDF predicate. + * @param object1: The first RDF object. + * @param object2: The second RDF object. + * @param endSubjectRepeat: The marker to end the subject repetition symbol. + */ + private void createTriplePOO(StringBuffer result, String predicate, String object1, String object2, boolean endSubjectRepeat) { result.append(" "); result.append(predicate); result.append(" "); result.append(object1); result.append(" , "); - result.append(object2); - result.append(" .\n"); + result.append(object2); + if (endSubjectRepeat) { + result.append(" .\n"); + } else { + result.append(" ;\n"); + } nrTriples = nrTriples + 2; - - return result.toString(); } - public void serialize() { + /** + * Ends the serialization. + */ + public void close() { try { for(int i=0;i. - * - * Copyright (C) 2011 OpenLink Software - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; only Version 2 of the License dated - * June 1991. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ -package ldbc.socialnet.dbgen.storage; - -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.ObjectOutputStream; - -import ldbc.socialnet.dbgen.objects.ReducedUserProfile; - - -public class MFStoreManager extends StorageManager { - - boolean isMultiFile = true; - int numFiles; - int lastCell; - - int numCellPerFile; - int numObjectPerFile; - - String mulpassOutUserProf[]; - FileOutputStream fos[]; - ObjectOutputStream oos[]; - int numSerializedObject; - int lastfileIdx = -2; - - public MFStoreManager(int _cellSize, int _windowSize, int pass, - int _lastCell, int _nFile, String _outUserProfile, String _baseDir){ - numSerializedObject = 0; - cellSize = _cellSize; - windowSize = _windowSize; - numFiles = _nFile; - lastCell = _lastCell; - outUserProfile = _outUserProfile; - baseDir = _baseDir; - - numCellPerFile = (lastCell + 1) / numFiles; - numObjectPerFile = numCellPerFile * cellSize; - - mulpassOutUserProf = new String[numFiles]; - fos = new FileOutputStream[numFiles]; - oos = new ObjectOutputStream[numFiles]; - } - - public void initSerialization(int pass) { - try { - numberSerializedObject = 0; - - for (int i = 0; i < numFiles; i++){ - mulpassOutUserProf[i] = pass + "_" + outUserProfile + "_" + i; - fos[i] = new FileOutputStream(baseDir + mulpassOutUserProf[i]); - oos[i] = new ObjectOutputStream(fos[i]); - } - } catch (IOException i) { - i.printStackTrace(); - } - } - - public void serialize(int from, int to, int pass, - ReducedUserProfile userProfiles[]) - { - int curfileIdx = -1; - - - curfileIdx = numberSerializedObject/numObjectPerFile; - - - // The last file may have more cells than other files - if (curfileIdx == numFiles) - curfileIdx = numFiles - 1; - - serializeReducedUserProfiles(from, to, pass, userProfiles, oos[curfileIdx]); - try { - oos[curfileIdx].flush(); - - // close the file - if ((lastfileIdx != curfileIdx) && (lastfileIdx > -1)) { - oos[lastfileIdx].close(); - fos[lastfileIdx].close(); - lastfileIdx = curfileIdx; - } - // For the first file - if (lastfileIdx != curfileIdx){ - lastfileIdx = curfileIdx; - } - - } catch (IOException e) { - // TODO Auto-generated catch block - System.out.println("Error while flushing the cell a users"); - e.printStackTrace(); - } - - } - public void serializeCellUsers(int pass, - ReducedUserProfile userProfiles[]) - { - int curfileIdx = -1; - - - curfileIdx = numberSerializedObject/numObjectPerFile; - - - // The last file may have more cells than other files - if (curfileIdx == numFiles) - curfileIdx = numFiles - 1; - - serializeReducedUserProfiles(0, cellSize, pass, userProfiles, oos[curfileIdx]); - - try { - oos[curfileIdx].flush(); - - // close the file - if ((lastfileIdx != curfileIdx) && (lastfileIdx > -1)) { - oos[lastfileIdx].close(); - fos[lastfileIdx].close(); - lastfileIdx = curfileIdx; - } - // For the first file - if (lastfileIdx != curfileIdx){ - lastfileIdx = curfileIdx; - } - - } catch (IOException e) { - // TODO Auto-generated catch block - System.out.println("Error while flushing the cell a users"); - e.printStackTrace(); - } - } - public void endSerialization() { - try { - fos[lastfileIdx].close(); - oos[lastfileIdx].close(); - } catch (Exception e) { - e.printStackTrace(); - } - } - public String[] getMulpassOutUserProf() { - return mulpassOutUserProf; - } - - public void setMulpassOutUserProf(String[] mulpassOutUserProf) { - this.mulpassOutUserProf = mulpassOutUserProf; - } - -} diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/storage/StreamStoreManager.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/storage/StreamStoreManager.java index 21057bc68..efb4ed3bc 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/storage/StreamStoreManager.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/storage/StreamStoreManager.java @@ -44,8 +44,6 @@ import java.io.ObjectOutputStream; import ldbc.socialnet.dbgen.objects.GPS; -import ldbc.socialnet.dbgen.objects.PhotoStream; -import ldbc.socialnet.dbgen.objects.PostStream; public class StreamStoreManager { @@ -108,26 +106,6 @@ public void initSerialization() { } } - public void serialize(PostStream postStream){ - try { - oos.writeObject(postStream); - numberSerializedObject++; - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - - public void serialize(PhotoStream photoStream){ - try { - oos.writeObject(photoStream); - numberSerializedObject++; - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - public void serialize(GPS gps){ try { oos.writeObject(gps); diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/util/ZOrder.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/util/ZOrder.java index ec418de23..f5f8e94c0 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/util/ZOrder.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/util/ZOrder.java @@ -37,58 +37,33 @@ package ldbc.socialnet.dbgen.util; -/** - * - * @author Minh-Duc Pham - * - */ - public class ZOrder { - /** - * @param args - */ - public int MAX_BIT_NO = 8; + private int MAX_BIT_NUMBER; - public static void main(String[] args) { - // TODO Auto-generated method stub - ZOrder zorder = new ZOrder(8); - zorder.getZValue(2, 0); - } - public ZOrder(int maxNumBit){ - this.MAX_BIT_NO = maxNumBit; + this.MAX_BIT_NUMBER = maxNumBit; } - public int getZValue(int x, int y){ - String sX = Integer.toBinaryString(x); - - - int numberToAddX = MAX_BIT_NO - sX.length(); + public int getZValue(int x, int y) { + + String sX = Integer.toBinaryString(x); + int numberToAddX = MAX_BIT_NUMBER - sX.length(); for (int i = 0; i < numberToAddX; i++){ sX = "0" + sX; } String sY = Integer.toBinaryString(y); - - int numberToAddY = MAX_BIT_NO - sY.length(); + int numberToAddY = MAX_BIT_NUMBER - sY.length(); for (int i = 0; i < numberToAddY; i++){ sY = "0" + sY; - } - - - //System.out.println(sX); - //System.out.println(sY); + } String sZ = ""; for (int i = 0; i < sX.length(); i++){ sZ = sZ + sX.substring(i, i+1) + "" + sY.substring(i, i+1); } - //System.out.println(sZ); - //System.out.println("The z-value is: " + Integer.parseInt(sZ, 2)); - return Integer.parseInt(sZ, 2); - } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBP.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBP.java index 50a6466a5..87fe8720a 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBP.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBP.java @@ -36,30 +36,33 @@ */ package ldbc.socialnet.dbgen.vocabulary; +/** + * RDF dbpedia resource namespace used in the serialization process. + */ public class DBP { - public static final String NS = "http://dbpedia.org/resource/"; + public static final String NAMESPACE = "http://dbpedia.org/resource/"; public static final String PREFIX = "dbpedia:"; - public static final String FACTPREFIX = "dbpedia_"; + /** + * Gets the dbpedia resource prefix version of the input. + */ public static String prefixed(String string) { return PREFIX + string; } - - public static String factprefixed(String string) { - return FACTPREFIX + string; - } + /** + * Gets the dbpedia resource URL version of the input. + */ public static String getUrl(String string) { - return NS + string; + return NAMESPACE + string; } + /** + * Gets the dbpedia resource RDF-URL version of the input. + */ public static String fullPrefixed(String string) { - return "<" + NS + string + ">"; + return "<" + NAMESPACE + string + ">"; } - - public static String getURI() { - return NS; - } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBPOWL.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBPOWL.java index ed1d55096..788ddcb57 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBPOWL.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBPOWL.java @@ -36,35 +36,39 @@ */ package ldbc.socialnet.dbgen.vocabulary; +/** + * RDF dbpedia ontology namespace used in the serialization process. + */ public class DBPOWL { - public static final String NS = "http://dbpedia.org/ontology/"; + + public static final String NAMESPACE = "http://dbpedia.org/ontology/"; public static final String PREFIX = "dbpedia-owl:"; - public static final String FACTPREFIX = "dbpedia-owl_"; + public static final String Place = PREFIX+"Place"; + public static final String City = PREFIX+"City"; + public static final String Country = PREFIX+"Country"; + public static final String Continent = PREFIX+"Continent"; + public static final String Organisation = PREFIX+"Organisation"; + public static final String partOf = PREFIX+"isPartOf"; + + /** + * Gets the dbpedia ontology prefix version of the input. + */ public static String prefixed(String string) { return PREFIX + string; } - - public static String factprefixed(String string) { - return FACTPREFIX + string; - } + /** + * Gets the dbpedia ontology URL version of the input. + */ public static String getUrl(String string) { - return NS + string; + return NAMESPACE + string; } + /** + * Gets the dbpedia ontology RDF-URL version of the input. + */ public static String fullprefixed(String string) { - return "<" + NS + string + ">"; + return "<" + NAMESPACE + string + ">"; } - - public static String getURI() { - return NS; - } - - public static final String Place = PREFIX+"Place"; - public static final String City = PREFIX+"City"; - public static final String Country = PREFIX+"Country"; - public static final String Continent = PREFIX+"Continent"; - public static final String Organisation = PREFIX+"Organisation"; - public static final String partOf = PREFIX+"isPartOf"; } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBPPROP.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBPPROP.java deleted file mode 100644 index b5819890d..000000000 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/DBPPROP.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2013 LDBC - * Linked Data Benchmark Council (http://ldbc.eu) - * - * This file is part of ldbc_socialnet_dbgen. - * - * ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ldbc_socialnet_dbgen is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with ldbc_socialnet_dbgen. If not, see . - * - * Copyright (C) 2011 OpenLink Software - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; only Version 2 of the License dated - * June 1991. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ -package ldbc.socialnet.dbgen.vocabulary; - -public class DBPPROP { - public static final String NS = "http://dbpedia.org/property/"; - public static final String PREFIX = "dbpedia-prop:"; - public static final String FACTPREFIX = "dbpedia-prop_"; - - public static String prefixed(String string) { - return PREFIX + string; - } - - public static String factprefixed(String string) { - return FACTPREFIX + string; - } - - public static String getUrl(String string) { - return NS + string; - } - - public static String fullprefixed(String string) { - return "<" + NS + string + ">"; - } - - public static String getURI() { - return NS; - } - - public static final String Name = PREFIX+"name"; -} diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/FOAF.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/FOAF.java index d7f003afd..bdeec8e74 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/FOAF.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/FOAF.java @@ -36,30 +36,34 @@ */ package ldbc.socialnet.dbgen.vocabulary; +/** + * Friend of a friend project namespace used in the serialization process. + */ public class FOAF { - public static final String NS = "http://xmlns.com/foaf/0.1/"; + + public static final String NAMESPACE = "http://xmlns.com/foaf/0.1/"; public static final String PREFIX = "foaf:"; - public static final String FACTPREFIX = "foaf_"; + + public static final String Name = PREFIX+"name"; + /** + * Gets the FOAF prefix version of the input. + */ public static String prefixed(String string) { return PREFIX + string; } - - public static String factprefixed(String string) { - return FACTPREFIX + string; - } + /** + * Gets the FOAF URL version of the input. + */ public static String getUrl(String string) { - return NS + string; + return NAMESPACE + string; } + /** + * Gets the FOAF RDF-URL version of the input. + */ public static String fullprefixed(String string) { - return "<" + NS + string + ">"; - } - - public static String getURI() { - return NS; + return "<" + NAMESPACE + string + ">"; } - - public static final String Name = PREFIX+"name"; } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/RDF.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/RDF.java index 5df558215..0b38bfe96 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/RDF.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/RDF.java @@ -36,32 +36,14 @@ */ package ldbc.socialnet.dbgen.vocabulary; -public class RDF{ - public static final String NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; - public static final String PREFIX = "rdf:"; - public static final String FACTPREFIX = "rdf_"; - - public static String prefixed(String string) { - return PREFIX + string; - } - - public static String factprefixed(String string) { - return FACTPREFIX + string; - } - - public static String getUrl(String string) { - return NS + string; - } - - public static String fullprefixed(String string) { - return "<" + NS + string + ">"; - } - - public static String getURI() { - return NS; - } - +/** + * RDF syntax namespace used in the serialization process. + */ +public class RDF { + public static final String NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + public static final String PREFIX = "rdf:"; + //Resources public static final String Alt = PREFIX+"Alt"; public static final String Bag = PREFIX+"Bag"; @@ -79,4 +61,25 @@ public static String getURI() { public static final String object = PREFIX+"object"; public static final String type = PREFIX+"type"; public static final String value = PREFIX+"value"; + + /** + * Gets the RDF syntax prefix version of the input. + */ + public static String prefixed(String string) { + return PREFIX + string; + } + + /** + * Gets the RDF syntax URL version of the input. + */ + public static String getUrl(String string) { + return NAMESPACE + string; + } + + /** + * Gets the RDF syntax RDF-URL version of the input. + */ + public static String fullprefixed(String string) { + return "<" + NAMESPACE + string + ">"; + } } \ No newline at end of file diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/RDFS.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/RDFS.java index c64e0c781..460205011 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/RDFS.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/RDFS.java @@ -36,33 +36,13 @@ */ package ldbc.socialnet.dbgen.vocabulary; -import java.util.HashMap; - +/** + * RDF schema namespace used in the serialization process. + */ public class RDFS { - public static final String NS="http://www.w3.org/2000/01/rdf-schema#"; - public static final String PREFIX = "rdfs:"; - public static final String FACTPREFIX = "rdfs_"; - - public static String prefixed(String string) { - return PREFIX + string; - } - - public static String factprefixed(String string) { - return FACTPREFIX + string; - } - - public static String getUrl(String string) { - return NS + string; - } - - public static String fullprefixed(String string) { - return "<" + NS + string + ">"; - } - - public static String getURI() { - return NS; - } + public static final String NAMESPACE="http://www.w3.org/2000/01/rdf-schema#"; + public static final String PREFIX = "rdfs:"; //Resources public static final String Datatype = PREFIX+"Datatype"; @@ -73,4 +53,25 @@ public static String getURI() { public static final String comment = PREFIX+"comment"; public static final String label = PREFIX+"label"; public static final String subClassOf = PREFIX+"subClassOf"; + + /** + * Gets the RDF schema prefix version of the input. + */ + public static String prefixed(String string) { + return PREFIX + string; + } + + /** + * Gets the RDF schema URL version of the input. + */ + public static String getUrl(String string) { + return NAMESPACE + string; + } + + /** + * Gets the RDF schema RDF-URL version of the input. + */ + public static String fullprefixed(String string) { + return "<" + NAMESPACE + string + ">"; + } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/SN.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/SN.java index 80faef4e3..a02a12b58 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/SN.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/SN.java @@ -36,79 +36,112 @@ */ package ldbc.socialnet.dbgen.vocabulary; +/** + * LDBC social network data namespace used in the serialization process. + */ public class SN{ - public static int machineId; - public static final String NS = "http://www.ldbc.eu/ldbc_socialnet/1.0/data/"; + + public static String machineId; + public static final String NAMESPACE = "http://www.ldbc.eu/ldbc_socialnet/1.0/data/"; public static final String PREFIX = "sn:"; - public static final String FACTPREFIX = "sn_"; public static final String BLANK_NODE = "_:"; - public static void setMachineNumber(int machineId) - { - SN.machineId = machineId; + /** + * Sets the machine id. + * Used as a suffix in some SN entities' tp create unique IDs in parallel generation. + */ + public static void setMachineNumber(int machineId, int numMachines) { + int digits = 0; + do { + numMachines /= 10; + } while (numMachines != 0); + SN.machineId = String.valueOf(machineId); + for (int i = SN.machineId.length(); i < digits; i++) { + SN.machineId = '0' + SN.machineId; + } } - public static String prefixed(String string) { + /** + * Gets the LDBC social network data prefix version of the input. + */ + public static String prefixed(String string) { return PREFIX + string; } - - public static String factprefixed(String string) { - return FACTPREFIX + string; - } + /** + * Gets the LDBC social network data URL version of the input. + */ public static String getUrl(String string) { - return NS + string; + return NAMESPACE + string; } + /** + * Gets the LDBC social network data RDF-URL version of the input. + */ public static String fullprefixed(String string) { - return "<" + NS + string + ">"; - } - - public static String getURI() { - return NS; + return "<" + NAMESPACE + string + ">"; } + /** + * Gets the person entity prefix. + */ public static String getPersonURI(long id) { return PREFIX+"pers"+id; } + /** + * Gets the forum entity prefix. + */ public static String getForumURI(long id) { return PREFIX+"forum"+id + "" + machineId; } - public static String getGroupURI(long id) { - return PREFIX+"group"+id + "" + machineId; - } - + /** + * Gets the post entity prefix. + */ public static String getPostURI(long id) { return PREFIX+"post"+id + "" + machineId; } + /** + * Gets the comment entity prefix. + */ public static String getCommentURI(long id) { return PREFIX+"comm"+id + "" + machineId; } + /** + * Gets the membership relation prefix. + */ public static String getMembershipURI(long id) { return BLANK_NODE+"mbs"+id + "" + machineId; } + /** + * Gets the like relation prefix. + */ public static String getLikeURI(long id) { return BLANK_NODE+"like"+id + "" + machineId; } - public static String getSpeaksURI(long id) { - return BLANK_NODE+"speak"+id + "" + machineId; - } - + /** + * Gets the studyAt relation prefix. + */ public static String getStudyAtURI(long id) { return BLANK_NODE+"study"+id + "" + machineId; } + /** + * Gets the workAt relation prefix. + */ public static String getWorkAtURI(long id) { return BLANK_NODE+"work"+id + "" + machineId; } + /** + * Gets the true id having in consideration the machine. + */ public static String formId(long id) { - return id + "" + machineId; + return id + machineId; } } \ No newline at end of file diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/SNVOC.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/SNVOC.java index 7b28fccf7..92e09cbd2 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/SNVOC.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/SNVOC.java @@ -36,65 +36,48 @@ */ package ldbc.socialnet.dbgen.vocabulary; +/** + * LDBC social network vocabulary namespace used in the serialization process. + */ public class SNVOC{ - public static final String NS = "http://www.ldbc.eu/ldbc_socialnet/1.0/vocabulary/"; - public static final String PREFIX = "snvoc:"; - public static final String FACTPREFIX = "snvoc_"; - - public static String prefixed(String string) { - return PREFIX + string; - } - - public static String factprefixed(String string) { - return FACTPREFIX + string; - } - public static String getUrl(String string) { - return NS + string; - } - - public static String fullprefixed(String string) { - return "<" + NS + string + ">"; - } - - public static String getURI() { - return NS; - } + public static final String NAMESPACE = "http://www.ldbc.eu/ldbc_socialnet/1.0/vocabulary/"; + public static final String PREFIX = "snvoc:"; //person - public static final String Person = PREFIX+"Person"; - public static final String creationDate = PREFIX+"creationDate"; - public static final String firstName = PREFIX+"firstName"; - public static final String lastName = PREFIX+"lastName"; - public static final String gender = PREFIX+"gender"; - public static final String birthday = PREFIX+"birthday"; - public static final String email = PREFIX+"email"; - public static final String speaks = PREFIX+"speaks"; - public static final String browser = PREFIX+"browserUsed"; - public static final String ipaddress = PREFIX+"locationIP"; - public static final String locatedIn = PREFIX+"isLocatedIn"; - public static final String studyAt = PREFIX+"studyAt"; - public static final String workAt = PREFIX+"workAt"; - public static final String hasInterest = PREFIX+"hasInterest"; - public static final String like = PREFIX+"likes"; - public static final String knows = PREFIX+"knows"; - public static final String follows = PREFIX+"follows"; - public static final String classYear = PREFIX+"classYear"; - public static final String workFrom = PREFIX+"workFrom"; - public static final String hasOrganisation = PREFIX+"hasOrganisation"; - public static final String hasPost = PREFIX+"hasPost"; - - //Forum - public static final String Forum = PREFIX+"Forum"; - public static final String title = PREFIX+"title"; - public static final String hasModerator = PREFIX+"hasModerator"; - public static final String hasTag = PREFIX+"hasTag"; - public static final String hasMember = PREFIX+"hasMember"; - public static final String containerOf = PREFIX+"containerOf"; - public static final String hasPerson = PREFIX+"hasPerson"; - public static final String joinDate = PREFIX+"joinDate"; - - //Post & Comment + public static final String Person = PREFIX+"Person"; + public static final String creationDate = PREFIX+"creationDate"; + public static final String firstName = PREFIX+"firstName"; + public static final String lastName = PREFIX+"lastName"; + public static final String gender = PREFIX+"gender"; + public static final String birthday = PREFIX+"birthday"; + public static final String email = PREFIX+"email"; + public static final String speaks = PREFIX+"speaks"; + public static final String browser = PREFIX+"browserUsed"; + public static final String ipaddress = PREFIX+"locationIP"; + public static final String locatedIn = PREFIX+"isLocatedIn"; + public static final String studyAt = PREFIX+"studyAt"; + public static final String workAt = PREFIX+"workAt"; + public static final String hasInterest = PREFIX+"hasInterest"; + public static final String like = PREFIX+"likes"; + public static final String knows = PREFIX+"knows"; + public static final String follows = PREFIX+"follows"; + public static final String classYear = PREFIX+"classYear"; + public static final String workFrom = PREFIX+"workFrom"; + public static final String hasOrganisation = PREFIX+"hasOrganisation"; + public static final String hasPost = PREFIX+"hasPost"; + + //Forum + public static final String Forum = PREFIX+"Forum"; + public static final String title = PREFIX+"title"; + public static final String hasModerator = PREFIX+"hasModerator"; + public static final String hasTag = PREFIX+"hasTag"; + public static final String hasMember = PREFIX+"hasMember"; + public static final String containerOf = PREFIX+"containerOf"; + public static final String hasPerson = PREFIX+"hasPerson"; + public static final String joinDate = PREFIX+"joinDate"; + + //Post & Comment public static final String Post = PREFIX+"Post"; public static final String Comment = PREFIX+"Comment"; public static final String hasCreator = PREFIX+"hasCreator"; @@ -105,11 +88,29 @@ public static String getURI() { public static final String replyOf = PREFIX+"replyOf"; //Others - public static final String City = PREFIX+"City"; - public static final String Country = PREFIX+"Country"; - public static final String Region = PREFIX+"Region"; public static final String Organisation = PREFIX+"Organisation"; public static final String Tag = PREFIX+"Tag"; public static final String TagClass = PREFIX+"TagClass"; public static final String isPartOf = PREFIX+"isPartOf"; + + /** + * Gets the LDBC social network vocabulary prefix version of the input. + */ + public static String prefixed(String string) { + return PREFIX + string; + } + + /** + * Gets the LDBC social network vocabulary URL version of the input. + */ + public static String getUrl(String string) { + return NAMESPACE + string; + } + + /** + * Gets the LDBC social network vocabulary RDF-URL version of the input. + */ + public static String fullprefixed(String string) { + return "<" + NAMESPACE + string + ">"; + } } \ No newline at end of file diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/XSD.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/XSD.java index ccdd2ef90..056426edd 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/XSD.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/vocabulary/XSD.java @@ -36,43 +36,45 @@ */ package ldbc.socialnet.dbgen.vocabulary; -import java.util.HashMap; - +/** + * XML schema namespace used in the serialization process. + */ public class XSD { - public static final String NS = "http://www.w3.org/2001/XMLSchema#"; + + public static final String NAMESPACE = "http://www.w3.org/2001/XMLSchema#"; public static final String PREFIX = "xsd:"; - public static final String FACTPREFIX = "xsd_"; - - public static String prefixed(String string) { + //Resources + public static final String Integer = PREFIX + "integer"; + public static final String Int = PREFIX + "int"; + public static final String Float = PREFIX + "float"; + public static final String Double = PREFIX + "double"; + public static final String Long = PREFIX + "long"; + public static final String String = PREFIX + "string"; + public static final String Decimal = PREFIX + "decimal"; + public static final String Year = PREFIX + "gYear"; + public static final String Date = PREFIX + "date"; + public static final String DateTime = PREFIX + "dateTime"; + + + /** + * Gets the XML schema prefix version of the input. + */ + public static String prefixed(String string) { return PREFIX + string; } - - public static String factprefixed(String string) { - return FACTPREFIX + string; - } + /** + * Gets the XML schema URL version of the input. + */ public static String getUrl(String string) { - return NS + string; + return NAMESPACE + string; } + /** + * Gets the XML schema RDF-URL version of the input. + */ public static String fullprefixed(String string) { - return "<" + NS + string + ">"; - } - - public static String getURI() { - return NS; + return "<" + NAMESPACE + string + ">"; } - - //Resources - public static final String Integer = PREFIX + "integer"; - public static final String Int = PREFIX + "int"; - public static final String Float = PREFIX + "float"; - public static final String Double = PREFIX + "double"; - public static final String Long = PREFIX + "long"; - public static final String String = PREFIX + "string"; - public static final String Decimal = PREFIX + "decimal"; - public static final String Year = PREFIX + "gYear"; - public static final String Date = PREFIX + "date"; - public static final String DateTime = PREFIX + "dateTime"; } \ No newline at end of file diff --git a/ldbc_socialnet_dbgen/src/main/resources/dictionaries/email.txt b/ldbc_socialnet_dbgen/src/main/resources/dictionaries/email.txt index 9f2c78520..1ccb527a8 100644 --- a/ldbc_socialnet_dbgen/src/main/resources/dictionaries/email.txt +++ b/ldbc_socialnet_dbgen/src/main/resources/dictionaries/email.txt @@ -136,7 +136,7 @@ e-mail.ru emailfast.com emails.ru e-mails.ru -eminemfans .com +eminemfans.com envirocitizen.com eritrea.cc eritrea.cc @@ -192,7 +192,7 @@ happyhippo.com hasakah.com hateinthebox.com hebron.tv -hip hopmail.com +hopmail.com homs.cc hotbox.ru hotmail.com @@ -299,7 +299,7 @@ netfingers.com net-surf.com nettaxi.com newmail.ru -ni cedriveway.com +nicedriveway.com nightmail.ru nm.ru nocharge.com @@ -370,7 +370,7 @@ rescueteam.com rockeros.com romance106fm.com rome.com -sa veourplanet.org +saveourplanet.org safat.biz safat.info safat.us From b00b965d8a4edff1c54c5b71f0975d2dee2c0051 Mon Sep 17 00:00:00 2001 From: Arnau Date: Thu, 12 Dec 2013 17:02:58 +0100 Subject: [PATCH 05/10] removed run from repo --- run.sh | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100755 run.sh diff --git a/run.sh b/run.sh deleted file mode 100755 index 26d853fd9..000000000 --- a/run.sh +++ /dev/null @@ -1,14 +0,0 @@ -export HADOOP_HOME=/home/aprat/Programs/hadoop-1.0.3 #change to your hadoop folder -export LDBC_SOCIALNET_DBGEN_HOME=/home/aprat/Projects/LDBC/generador/fork/ldbc_socialnet_bm/ldbc_socialnet_dbgen #change to your ldbc_socialnet_dbgen folder -export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk-amd64 -export NUM_MACHINES=1 - -mkdir $LDBC_SOCIALNET_DBGEN_HOME/outputDir -mvn -f $LDBC_SOCIALNET_DBGEN_HOME/pom.xml clean -mvn -f $LDBC_SOCIALNET_DBGEN_HOME/pom.xml assembly:assembly - -# para borrar ficheros temporales -cp $LDBC_SOCIALNET_DBGEN_HOME/target/ldbc_socialnet_dbgen.jar $LDBC_SOCIALNET_DBGEN_HOME/ -rm $LDBC_SOCIALNET_DBGEN_HOME/target/ldbc_socialnet_dbgen.jar - -$HADOOP_HOME/bin/hadoop jar $LDBC_SOCIALNET_DBGEN_HOME/ldbc_socialnet_dbgen.jar input/sib output/sib $NUM_MACHINES $LDBC_SOCIALNET_DBGEN_HOME/ $LDBC_SOCIALNET_DBGEN_HOME/outputDir/ From c831e862cc08cc30f8beee3012cc18903c5108a6 Mon Sep 17 00:00:00 2001 From: Arnau Date: Mon, 16 Dec 2013 16:59:31 +0100 Subject: [PATCH 06/10] fixed email generation. cleaned some code. --- .../dbgen/generator/ScalableGenerator.java | 289 ++++++------------ 1 file changed, 87 insertions(+), 202 deletions(-) diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java index 245565628..7a686892b 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java @@ -51,6 +51,7 @@ import java.util.Properties; import java.util.Random; import java.util.Vector; +import java.text.Normalizer; import ldbc.socialnet.dbgen.dictionary.BrowserDictionary; import ldbc.socialnet.dbgen.dictionary.CompanyDictionary; @@ -732,25 +733,19 @@ private void _init(int mapId, boolean isFullLoad) { * @param numCell The number of cells the generator will parse. */ public void generateUserActivity(String inputFile, int numCell) { - long startPostGeneration = System.currentTimeMillis(); - //NOTE: Until this point of the code numtotalUser*2 forums where generated (2 for user) thats //the reason behind this forum id assignment. groupGenerator.setForumId((numtotalUser + 10) * 2); generatePostandPhoto(inputFile, numCell); - long endPostGeneration = System.currentTimeMillis(); System.out.println("Post generation takes " + getDurationInMinutes(startPostGeneration, endPostGeneration)); - long startGroupGeneration = System.currentTimeMillis(); generateAllGroup(inputFile, numCell); long endGroupGeneration = System.currentTimeMillis(); System.out.println("Group generation takes " + getDurationInMinutes(startGroupGeneration, endGroupGeneration)); - serializer.close(); writeStatistics(); - System.out.println("Number of generated triples " + serializer.unitsGenerated()); System.out.println("Number of popular users " + numPopularUser); System.out.println("Writing the data for test driver "); @@ -763,17 +758,13 @@ public void generateUserActivity(String inputFile, int numCell) { * @param numCells The number of cells the generator will parse. */ public void generatePostandPhoto(String inputFile, int numCells) { - reducedUserProfilesCell = new ReducedUserProfile[cellSize]; StorageManager storeManager = new StorageManager(cellSize, windowSize, outUserProfile, sibOutputDir); - storeManager.initDeserialization(inputFile); System.out.println("Generating the posts & comments "); System.out.println("Number of cells in file : " + numCells); - for (int i = 0; i < numCells; i++) { storeManager.deserializeOneCellUserProfile(reducedUserProfilesCell); - for (int j = 0; j < cellSize; j++) { UserExtraInfo extraInfo = new UserExtraInfo(); setInfoFromUserProfile(reducedUserProfilesCell[j], extraInfo); @@ -783,7 +774,6 @@ public void generatePostandPhoto(String inputFile, int numCells) { generatePhoto(reducedUserProfilesCell[j], extraInfo); } } - storeManager.endDeserialization(); System.out.println("Done generating the posts and photos...."); System.out.println("Number of deserialized objects is " + storeManager.getNumberDeSerializedObject()); @@ -797,15 +787,11 @@ public void generatePostandPhoto(String inputFile, int numCells) { */ public void generateAllGroup(String inputFile, int numCell) { groupStoreManager = new StorageManager(cellSize, windowSize, outUserProfile, sibOutputDir); - groupStoreManager.initDeserialization(inputFile); - for (int i = 0; i < numCell; i++) { generateGroups(4, i, numCell); } - groupStoreManager.endDeserialization(); - System.out.println("Done generating user groups and groups' posts"); System.out.println("Number of deserialized objects for group is " + groupStoreManager.getNumberDeSerializedObject()); } @@ -818,29 +804,22 @@ public void generateAllGroup(String inputFile, int numCell) { * @param numCell The total number of cells. */ public void generateGroups(int pass, int cellPos, int numCell) { - int newCellPosInWindow = cellPos % numberOfCellPerWindow; int newIdxInWindow = newCellPosInWindow * cellSize; int newStartIndex = (cellPos % numberOfCellPerWindow) * cellSize; - groupStoreManager.deserializeOneCellUserProfile(newIdxInWindow, cellSize, reducedUserProfiles); - for (int i = 0; i < cellSize; i++) { int curIdxInWindow = newStartIndex + i; double moderatorProb = randGroupModerator.nextDouble(); - - if (moderatorProb > groupModeratorProb) { - continue; - } - - Friend firstLevelFriends[] = reducedUserProfiles[curIdxInWindow].getFriendList(); - Vector secondLevelFriends = new Vector(); - //TODO: Include friends of friends a.k.a second level friends? - - int numGroup = randNumberGroup.nextInt(maxNumGroupCreatedPerUser); - for (int j = 0; j < numGroup; j++) { - createGroupForUser(reducedUserProfiles[curIdxInWindow], - firstLevelFriends, secondLevelFriends); + if (moderatorProb <= groupModeratorProb) { + Friend firstLevelFriends[] = reducedUserProfiles[curIdxInWindow].getFriendList(); + Vector secondLevelFriends = new Vector(); + //TODO: Include friends of friends a.k.a second level friends? + int numGroup = randNumberGroup.nextInt(maxNumGroupCreatedPerUser); + for (int j = 0; j < numGroup; j++) { + createGroupForUser(reducedUserProfiles[curIdxInWindow], + firstLevelFriends, secondLevelFriends); + } } } } @@ -848,42 +827,31 @@ public void generateGroups(int pass, int cellPos, int numCell) { public void pushUserProfile(ReducedUserProfile reduceUser, int pass, Reducer.Context context, boolean isContext, ObjectOutputStream oos){ + numUserProfilesRead++; + ReducedUserProfile userObject = new ReducedUserProfile(); + userObject.copyFields(reduceUser); + if (numUserProfilesRead <= windowSize) { + reducedUserProfiles[numUserProfilesRead-1] = userObject; + if( numUserProfilesRead == windowSize ) { + mr2InitFriendShipWindow(pass, context, isContext, oos); + } + } else { + numUserForNewCell++; + cellReducedUserProfiles[numUserForNewCell-1] = userObject; + if (numUserForNewCell == cellSize){ + mrCurCellPost++; + mr2SlideFriendShipWindow( pass, + mrCurCellPost, + context, + cellReducedUserProfiles, + isContext, + oos); + numUserForNewCell = 0; + } + } + reduceUser = null; + } - numUserProfilesRead++; - ReducedUserProfile userObject = new ReducedUserProfile(); - userObject.copyFields(reduceUser); - - if (numUserProfilesRead < windowSize) { - if (reducedUserProfiles[numUserProfilesRead-1] != null){ - reducedUserProfiles[numUserProfilesRead-1].clear(); - reducedUserProfiles[numUserProfilesRead-1] = null; - } - reducedUserProfiles[numUserProfilesRead-1] = userObject; - } else if (numUserProfilesRead == windowSize) { - if (reducedUserProfiles[numUserProfilesRead-1] != null){ - reducedUserProfiles[numUserProfilesRead-1].clear(); - reducedUserProfiles[numUserProfilesRead-1] = null; - } - reducedUserProfiles[numUserProfilesRead-1] = userObject; - mr2InitFriendShipWindow(pass, context, isContext, oos); - } else { - numUserForNewCell++; - - if (cellReducedUserProfiles[numUserForNewCell-1] != null){ - cellReducedUserProfiles[numUserForNewCell-1] = null; - } - - cellReducedUserProfiles[numUserForNewCell-1] = userObject; - if (numUserForNewCell == cellSize){ - mrCurCellPost++; - mr2SlideFriendShipWindow(pass,mrCurCellPost, context, cellReducedUserProfiles, - isContext, oos); - - numUserForNewCell = 0; - } - } - reduceUser = null; - } public void pushAllRemainingUser(int pass, Reducer.Context context, boolean isContext, ObjectOutputStream oos){ @@ -933,176 +901,89 @@ public void generateCellOfUsers2(int newStartIndex, ReducedUserProfile[] _cellRe } public void mr2InitFriendShipWindow(int pass, Reducer.Context context, boolean isContext, ObjectOutputStream oos){ - for (int i = 0; i < cellSize; i++) { // From this user, check all the user in the window to create friendship - for (int j = i + 1; j < windowSize - 1; j++) { - if (reducedUserProfiles[i].getNumFriendsAdded() - == reducedUserProfiles[i].getNumFriends(pass)) { - break; - } - if (reducedUserProfiles[j].getNumFriendsAdded() - == reducedUserProfiles[j].getNumFriends(pass)) { - continue; - } - - if (reducedUserProfiles[i].isExistFriend( - reducedUserProfiles[j].getAccountId())) { - continue; + for (int j = i + 1; (j < windowSize - 1) && + (reducedUserProfiles[i].getNumFriendsAdded() != reducedUserProfiles[i].getNumFriends(pass)); + j++) { + if (!((reducedUserProfiles[j].getNumFriendsAdded() + == reducedUserProfiles[j].getNumFriends(pass)) || + reducedUserProfiles[i].isExistFriend(reducedUserProfiles[j].getAccountId()))) { + double randProb = randUniform.nextDouble(); + double prob = getFriendCreatePro(i, j, pass); + if ((randProb < prob) || (randProb < limitProCorrelated)) { + createFriendShip(reducedUserProfiles[i], reducedUserProfiles[j], (byte) pass); + } } - - // Generate a random value - double randProb = randUniform.nextDouble(); - - double prob = getFriendCreatePro(i, j, pass); - - if ((randProb < prob) || (randProb < limitProCorrelated)) { - // add a friendship - createFriendShip(reducedUserProfiles[i], reducedUserProfiles[j], (byte) pass); - } - - } + } } - updateLastPassFriendAdded(0, cellSize, pass); - mrWriter.writeReducedUserProfiles(0, cellSize, pass, reducedUserProfiles, context, isContext, oos); - exactOutput = exactOutput + cellSize; } public void mr2SlideFriendShipWindow(int pass, int cellPos, Reducer.Context context, ReducedUserProfile[] _cellReduceUserProfiles, boolean isContext, ObjectOutputStream oos){ - // In window, position of new cell = the position of last removed cell = cellPos - 1 int newCellPosInWindow = (cellPos - 1) % numberOfCellPerWindow; - int newStartIndex = newCellPosInWindow * cellSize; - - int curIdxInWindow; - // Init the number of friends for each user in the new cell generateCellOfUsers2(newStartIndex, _cellReduceUserProfiles); - // Create the friendships // Start from each user in the first cell of the window --> at the // cellPos, not from the new cell newStartIndex = (cellPos % numberOfCellPerWindow) * cellSize; for (int i = 0; i < cellSize; i++) { - curIdxInWindow = newStartIndex + i; - // Generate set of friends list - - // Here assume that all the users in the window including the new - // cell have the number of friends - // and also the number of friends to add - - double randProb; - - if (reducedUserProfiles[curIdxInWindow].getNumFriendsAdded() - == reducedUserProfiles[curIdxInWindow].getNumFriends(pass)) { - continue; - } - + int curIdxInWindow = newStartIndex + i; // From this user, check all the user in the window to create friendship - for (int j = i + 1; (j < windowSize - 1) - && reducedUserProfiles[curIdxInWindow].getNumFriendsAdded() - < reducedUserProfiles[curIdxInWindow].getNumFriends(pass); j++) { - + for (int j = i + 1; (j < windowSize - 1) && reducedUserProfiles[curIdxInWindow].getNumFriendsAdded() < reducedUserProfiles[curIdxInWindow].getNumFriends(pass); j++) { int checkFriendIdx = (curIdxInWindow + j) % windowSize; - - if (reducedUserProfiles[checkFriendIdx].getNumFriendsAdded() - == reducedUserProfiles[checkFriendIdx].getNumFriends(pass)) { - continue; - } - - if (reducedUserProfiles[curIdxInWindow].isExistFriend( - reducedUserProfiles[checkFriendIdx].getAccountId())) { - continue; + if ( !(reducedUserProfiles[checkFriendIdx].getNumFriendsAdded() + == reducedUserProfiles[checkFriendIdx].getNumFriends(pass) || + reducedUserProfiles[curIdxInWindow].isExistFriend(reducedUserProfiles[checkFriendIdx].getAccountId()))) { + double randProb = randUniform.nextDouble(); + double prob = getFriendCreatePro(curIdxInWindow, checkFriendIdx, pass); + if ((randProb < prob) || (randProb < limitProCorrelated)) { + createFriendShip(reducedUserProfiles[curIdxInWindow], reducedUserProfiles[checkFriendIdx], (byte) pass); + } } - - - // Generate a random value - randProb = randUniform.nextDouble(); - - double prob = getFriendCreatePro(curIdxInWindow, checkFriendIdx, pass); - - if ((randProb < prob) || (randProb < limitProCorrelated)) { - // add a friendship - createFriendShip(reducedUserProfiles[curIdxInWindow], reducedUserProfiles[checkFriendIdx], - (byte) pass); - } } - } - updateLastPassFriendAdded(newStartIndex, newStartIndex + cellSize, pass); mrWriter.writeReducedUserProfiles(newStartIndex, newStartIndex + cellSize, pass, reducedUserProfiles, context, isContext, oos); - exactOutput = exactOutput + cellSize; } public void mr2SlideLastCellsFriendShip(int pass, int cellPos, int numleftCell, Reducer.Context context, boolean isContext, ObjectOutputStream oos) { - int newStartIndex; - int curIdxInWindow; - newStartIndex = (cellPos % numberOfCellPerWindow) * cellSize; - for (int i = 0; i < cellSize; i++) { curIdxInWindow = newStartIndex + i; - // Generate set of friends list - - // Here assume that all the users in the window including the new - // cell have the number of friends - // and also the number of friends to add - - double randProb; - - if (reducedUserProfiles[curIdxInWindow].getNumFriendsAdded() - == reducedUserProfiles[curIdxInWindow].getNumFriends(pass)) { - continue; - } - // From this user, check all the user in the window to create friendship for (int j = i + 1; (j < numleftCell * cellSize - 1) && reducedUserProfiles[curIdxInWindow].getNumFriendsAdded() < reducedUserProfiles[curIdxInWindow].getNumFriends(pass); j++) { - int checkFriendIdx = (curIdxInWindow + j) % windowSize; - - if (reducedUserProfiles[checkFriendIdx].getNumFriendsAdded() - == reducedUserProfiles[checkFriendIdx].getNumFriends(pass)) { - continue; - } - - if (reducedUserProfiles[curIdxInWindow].isExistFriend( - reducedUserProfiles[checkFriendIdx].getAccountId())) { - continue; + if ( !(reducedUserProfiles[checkFriendIdx].getNumFriendsAdded() + == reducedUserProfiles[checkFriendIdx].getNumFriends(pass) || + reducedUserProfiles[curIdxInWindow].isExistFriend(reducedUserProfiles[checkFriendIdx].getAccountId()))) { + double randProb = randUniform.nextDouble(); + double prob = getFriendCreatePro(curIdxInWindow, checkFriendIdx, pass); + if ((randProb < prob) || (randProb < limitProCorrelated)) { + createFriendShip(reducedUserProfiles[curIdxInWindow], reducedUserProfiles[checkFriendIdx], + (byte) pass); + } } - - - // Generate a random value - randProb = randUniform.nextDouble(); - - double prob = getFriendCreatePro(curIdxInWindow, checkFriendIdx, pass); - - if ((randProb < prob) || (randProb < limitProCorrelated)) { - // add a friendship - createFriendShip(reducedUserProfiles[curIdxInWindow], reducedUserProfiles[checkFriendIdx], - (byte) pass); - } } - } updateLastPassFriendAdded(newStartIndex, newStartIndex + cellSize, pass); mrWriter.writeReducedUserProfiles(newStartIndex, newStartIndex + cellSize, pass, reducedUserProfiles, context, isContext, oos); exactOutput = exactOutput + cellSize; - } public void generatePosts(ReducedUserProfile user, UserExtraInfo extraInfo){ @@ -1226,23 +1107,22 @@ public void createGroupForUser(ReducedUserProfile user, } } } else if (randLevelProb < levelProbs[1]) { // ==> level 2 - if (secondLevelFriends.size() == 0) - continue; - - int friendIdx = randMemberIdxSelector.nextInt(secondLevelFriends.size()); - int potentialMemberAcc = secondLevelFriends.get(friendIdx).getFriendAcc(); - randMemberProb = randMembership.nextDouble(); - if (randMemberProb < joinProbs[1]) { - // Check whether this user has been added and then add to the group - if (!memberIds.contains(potentialMemberAcc)) { - memberIds.add(potentialMemberAcc); - // Assume the earliest membership date is the friendship created date - GroupMemberShip memberShip = groupGenerator.createGroupMember( - potentialMemberAcc, group.getCreatedDate(), - secondLevelFriends.get(friendIdx)); - group.addMember(memberShip); - } - } + if (secondLevelFriends.size() != 0) { + int friendIdx = randMemberIdxSelector.nextInt(secondLevelFriends.size()); + int potentialMemberAcc = secondLevelFriends.get(friendIdx).getFriendAcc(); + randMemberProb = randMembership.nextDouble(); + if (randMemberProb < joinProbs[1]) { + // Check whether this user has been added and then add to the group + if (!memberIds.contains(potentialMemberAcc)) { + memberIds.add(potentialMemberAcc); + // Assume the earliest membership date is the friendship created date + GroupMemberShip memberShip = groupGenerator.createGroupMember( + potentialMemberAcc, group.getCreatedDate(), + secondLevelFriends.get(friendIdx)); + group.addMember(memberShip); + } + } + } } else { // ==> random users // Select a user from window int friendIdx = randMemberIdxSelector.nextInt(windowSize); @@ -1486,7 +1366,12 @@ public void setInfoFromUserProfile(ReducedUserProfile user, int numEmails = randomExtraInfo.nextInt(maxEmails) + 1; double prob = randomExtraInfo.nextDouble(); if (prob >= missingRatio) { - String base = userExtraInfo.getFirstName().replaceAll(" ", "."); + String base = userExtraInfo.getFirstName(); + base = Normalizer.normalize(base,Normalizer.Form.NFD); + base = base.replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); + base = base.replaceAll(" ", "."); + base = base.replaceAll("[.]+", "."); + for (int i = 0; i < numEmails; i++) { String email = base + "" + user.getAccountId() + "@" + emailDic.getRandomEmail(); userExtraInfo.addEmail(email); From 44d7f146264cc87748c67483988eabdac6b08fbc Mon Sep 17 00:00:00 2001 From: Arnau Date: Wed, 8 Jan 2014 09:47:10 +0100 Subject: [PATCH 07/10] removed remaining HashSets --- .../socialnet/dbgen/dictionary/TagMatrix.java | 6 +- .../dbgen/dictionary/TagTextDictionary.java | 8 +- .../dbgen/generator/GPSGenerator.java | 4 +- .../dbgen/generator/GroupGenerator.java | 4 +- .../dbgen/generator/MRGenerateUsers.java | 6 +- .../dbgen/generator/PhotoGenerator.java | 4 +- .../dbgen/generator/ScalableGenerator.java | 265 ++++++++++-------- .../ldbc/socialnet/dbgen/objects/Photo.java | 8 +- .../ldbc/socialnet/dbgen/objects/Post.java | 8 +- .../dbgen/objects/ReducedUserProfile.java | 30 +- .../dbgen/objects/UserExtraInfo.java | 8 +- .../socialnet/dbgen/objects/UserProfile.java | 14 +- .../dbgen/serializer/Statistics.java | 18 +- 13 files changed, 209 insertions(+), 174 deletions(-) diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagMatrix.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagMatrix.java index 61d8c0efa..d03022420 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagMatrix.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagMatrix.java @@ -38,7 +38,7 @@ import java.io.BufferedReader; import java.io.InputStreamReader; -import java.util.HashSet; +import java.util.TreeSet; import java.util.Vector; import java.util.Random; @@ -89,8 +89,8 @@ public void initMatrix() { } // Combine the main tag and related tags - public HashSet getSetofTags(int celebrityId, int numTags){ - HashSet resultTags = new HashSet(); + public TreeSet getSetofTags(int celebrityId, int numTags){ + TreeSet resultTags = new TreeSet(); resultTags.add(celebrityId); while (resultTags.size() < numTags) { diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagTextDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagTextDictionary.java index 56acf955c..8fddcf231 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagTextDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/TagTextDictionary.java @@ -40,7 +40,7 @@ import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; +import java.util.TreeSet; import java.util.Iterator; import java.util.Random; @@ -116,7 +116,7 @@ public String getTagText(int id) { return tagText.get(id); } - public String getRandomText(HashSet tags) { + public String getRandomText(TreeSet tags) { int textSize; int startingPos; @@ -218,7 +218,7 @@ public Post createPost(ReducedUserProfile user, int maxNumberOfLikes, post.setIpAddress(ipAddDic.getIP(user.getIpAddress(), user.isFrequentChange(), post.getCreatedDate())); post.setBrowserIdx(browserDic.getPostBrowserId(user.getBrowserIdx())); - HashSet tags = new HashSet(); + TreeSet tags = new TreeSet(); Iterator it = user.getSetOfTags().iterator(); while (it.hasNext()) { Integer value = it.next(); @@ -263,7 +263,7 @@ public Post createPost(Group group, int maxNumberOfLikes, post.setIpAddress(ipAddDic.getIP(memberShip.getIP(), memberShip.isFrequentChange(), post.getCreatedDate())); post.setBrowserIdx(browserDic.getPostBrowserId(memberShip.getBrowserIdx())); - HashSet tags = new HashSet(); + TreeSet tags = new TreeSet(); for (int i = 0; i < group.getTags().length; i++) { tags.add(group.getTags()[i]); } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java index a9bedb830..053a63477 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java @@ -36,7 +36,7 @@ */ package ldbc.socialnet.dbgen.generator; -import java.util.HashSet; +import java.util.TreeSet; import java.util.Iterator; import java.util.Random; import java.util.Vector; @@ -123,7 +123,7 @@ public void generateAllGPSForAllEvents(StreamStoreManager storeMng){ for (int i = 0; i < eventSet.size(); i++){ int numAttendedUsers = randNumUser.nextInt(100) + 20; //int lastUserId = 0; - HashSet attendedUsers = new HashSet(numAttendedUsers); + TreeSet attendedUsers = new TreeSet(numAttendedUsers); while (attendedUsers.size() < numAttendedUsers){ //int step = randNumUser.nextInt(totalNumUsers - numAttendedUsers- lastUserId + j); //lastUserId = lastUserId + step + j; diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GroupGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GroupGenerator.java index 693826602..3d38ac90a 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GroupGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GroupGenerator.java @@ -36,7 +36,7 @@ */ package ldbc.socialnet.dbgen.generator; -import java.util.HashSet; +import java.util.TreeSet; import java.util.Iterator; import java.util.Random; import java.util.Vector; @@ -87,7 +87,7 @@ public Group createGroup(ReducedUserProfile user){ //Use the user location for group locationIdx group.setLocationIdx(user.getLocationIdx()); - HashSet tagSet = user.getSetOfTags(); + TreeSet tagSet = user.getSetOfTags(); Iterator iter = tagSet.iterator(); int idx = randGroupInterest.nextInt(tagSet.size()); for (int i = 0; i < idx; i++){ diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/MRGenerateUsers.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/MRGenerateUsers.java index 61a9dd358..d04b03c70 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/MRGenerateUsers.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/MRGenerateUsers.java @@ -102,7 +102,7 @@ public void map(LongWritable key, Text value, Context context) // Generate user information generator.mrGenerateUserInfo(pass, context, fileIdx); - System.out.println("Total friendship number from " + fileIdx + " : " + generator.friendshipNo); + System.out.println("Total friendship number from " + fileIdx + " : " + generator.friendshipNum); } } @@ -150,7 +150,7 @@ protected void cleanup(Context context){ System.out.println("Summary for " + attempTaskId); System.out.println("Number of user profile read " + friendGenerator.numUserProfilesRead); System.out.println("Number of exact user profile out " + friendGenerator.exactOutput); - System.out.println("Number of exact friend added " + friendGenerator.friendshipNo); + System.out.println("Number of exact friend added " + friendGenerator.friendshipNum); } } @@ -230,7 +230,7 @@ protected void cleanup(Context context){ System.out.println("Summary for " + attempTaskId); System.out.println("Number of user profile read " + friendGenerator.numUserProfilesRead); System.out.println("Number of exact user profile out " + friendGenerator.exactOutput); - System.out.println("Number of exact friend added " + friendGenerator.friendshipNo); + System.out.println("Number of exact friend added " + friendGenerator.friendshipNum); } } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PhotoGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PhotoGenerator.java index 8354fa2aa..f069aebb9 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PhotoGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/PhotoGenerator.java @@ -36,7 +36,7 @@ */ package ldbc.socialnet.dbgen.generator; -import java.util.HashSet; +import java.util.TreeSet; import java.util.Random; import ldbc.socialnet.dbgen.dictionary.LocationDictionary; @@ -121,7 +121,7 @@ public Photo generatePhoto(ReducedUserProfile user, Group album, //Assume that the photo are created one by one after 1 second from the creation of the album photo.setTakenTime(album.getCreatedDate() + 1000*(idxInAlbum+1)); - HashSet tags = new HashSet(); + TreeSet tags = new TreeSet(); // Iterator it = user.getSetOfTags().iterator(); // while (it.hasNext()) { // Integer value = it.next(); diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java index 7a686892b..46c85fd22 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java @@ -47,7 +47,7 @@ import java.io.ObjectOutputStream; import java.io.Writer; import java.util.GregorianCalendar; -import java.util.HashSet; +import java.util.TreeSet; import java.util.Properties; import java.util.Random; import java.util.Vector; @@ -124,24 +124,29 @@ public enum OrganisationType { //schema behaviour paramaters which are not in the private parameter file. //If any of these are upgraded to the parameter file copy the variable elsewhere below, //so this list is updated with only the variables not appearing in the parameter file. + /** * Power Law distribution alpha value */ private static final double alpha = 0.4; + /** + * The maximum number of likes + */ private static final int maxNumLikes = 10; + /** * Cumulative probability to join a group for the user direct friends, friends of friends and * friends of the friends of the user friend. */ private static final double levelProbs[] = { 0.5, 0.8, 1.0 }; + /** * Probability to join a group for the user direct friends, friends of friends and * friends of the friends of the user friend. */ private static final double joinProbs[] = { 0.7, 0.4, 0.1 }; - //Files and folders private static final String DICTIONARY_DIRECTORY = "/dictionaries/"; private static final String IPZONE_DIRECTORY = "/ipaddrByCountries"; @@ -276,12 +281,12 @@ public enum OrganisationType { int numCellRead = 0; int numCellPerfile; int numCellInLastFile; - HashSet selectedFileIdx; + TreeSet selectedFileIdx; // For friendship generation - int friendshipNo; - int minNoFriends; - int maxNoFriends; + int friendshipNum; + int minNumFriends; + int maxNumFriends; double friendRejectRatio; double friendReApproveRatio; @@ -290,50 +295,50 @@ public enum OrganisationType { MRWriter mrWriter; - Random randFriendReject; - Random randFriendReapprov; - Random randInitiator; + Random randomFriendReject; + Random randomFriendAproval; + Random randomInitiator; double baseProbCorrelated; double limitProCorrelated; // For each user - int minNoTagsPerUser; - int maxNoTagsPerUser; + int minNumTagsPerUser; + int maxNumTagsPerUser; int maxNumPostPerMonth; int maxNumComments; // Random values generators - PowerDistGenerator randPowerlaw; - PowerDistGenerator randTagPowerlaw; + PowerDistGenerator randomPowerLaw; + PowerDistGenerator randomTagPowerLaw; Long[] seeds; - Random randUniform; - Random randNumInterest; - Random randNumTags; + Random randomUniform; + Random randomNumInterests; + Random randomNumTags; Random randomFriendIdx; - Random randNumberPost; - Random randNumberComments; - Random randNumberPhotoAlbum; - Random randNumberPhotos; - Random randNumberGroup; - Random randNumberUserPerGroup; - Random randGender; - Random randUserRandomIdx; // For generating the random dimension - Random randNumPopularPlaces; - Random randFriendLevelSelect; // For select an level of moderator's friendship - Random randMembership; // For deciding whether or not a user is joined - Random randMemberIdxSelector; - Random randGroupMemStep; - Random randGroupModerator; // Decide whether a user can be moderator of groups or not - Random randNumberGroupPost; + Random randomNumPosts; + Random randomNumComments; + Random randomNumPhotoAlbums; + Random randomNumPhotos; + Random randomNumGroups; + Random randomNumUsersPerGroup; + Random randomGender; + Random randomUserRandomIdx; // For generating the random dimension + Random randomNumPopularPlaces; + Random randomFriendLevelSelect; // For select an level of moderator's friendship + Random randomMembership; // For deciding whether or not a user is joined + Random randomMemberIdxSelector; + Random randomGroupMemStep; + Random randomGroupModerator; // Decide whether a user can be moderator of groups or not + Random randomNumGroupPosts; Random randomExtraInfo; Random randomExactLongLat; Random randomHaveStatus; Random randomStatusSingle; Random randomStatus; - Random randUserAgent; - Random randFileSelect; - Random randIdsInWindow; + Random randomUserAgent; + Random randomFileSelect; + Random randomIdsInWindow; DateGenerator dateTimeGenerator; int startYear; @@ -400,25 +405,25 @@ public enum OrganisationType { String sibHomeDir; // For user's extra info - double missingRatio; - double missingStatusRatio; - double probSingleStatus; // Status "Single" has more probability than others' + double missingRatio; + double missingStatusRatio; + double probSingleStatus; // Status "Single" has more probability than others' - double probAnotherBrowser; - double probHavingSmartPhone; - double probSentFromAgent; + double probAnotherBrowser; + double probHavingSmartPhone; + double probSentFromAgent; // The probability that normal user posts from different location - double probDiffIPinTravelSeason; // in travel season - double probDiffIPnotTravelSeason; // not in travel season + double probDiffIPinTravelSeason; // in travel season + double probDiffIPnotTravelSeason; // not in travel season // The probability that travellers post from different location - double probDiffIPforTraveller; + double probDiffIPforTraveller; // Writing data for test driver - int thresholdPopularUser = 40; - int numPopularUser = 0; + int thresholdPopularUser = 40; + int numPopularUser = 0; // Data accessed from the hadoop jobs public ReducedUserProfile[] cellReducedUserProfiles; @@ -461,6 +466,11 @@ public ScalableGenerator(int mapreduceFileId, String sibOutputDir, String sibHom System.out.println("Current directory in ScaleGenerator is " + sibHomeDir); } + /** + * Gets the size of the cell. + * + * @return The size of che cell. + */ public int getCellSize() { return cellSize; } @@ -505,13 +515,13 @@ private void loadParamsFromFile() { cellSize = Short.parseShort(properties.getProperty(CELL_SIZE)); numberOfCellPerWindow = Integer.parseInt(properties.getProperty(NUM_CELL_WINDOW)); - minNoFriends = Integer.parseInt(properties.getProperty(MIN_FRIENDS)); - maxNoFriends = Integer.parseInt(properties.getProperty(MAX_FRIENDS)); - thresholdPopularUser = (int) (maxNoFriends * 0.9); + minNumFriends = Integer.parseInt(properties.getProperty(MIN_FRIENDS)); + maxNumFriends = Integer.parseInt(properties.getProperty(MAX_FRIENDS)); + thresholdPopularUser = (int) (maxNumFriends * 0.9); friendRejectRatio = Double.parseDouble(properties.getProperty(FRIEND_REJECT)); friendReApproveRatio = Double.parseDouble(properties.getProperty(FRIEND_REACCEPT)); - minNoTagsPerUser= Integer.parseInt(properties.getProperty(USER_MIN_TAGS)); - maxNoTagsPerUser= Integer.parseInt(properties.getProperty(USER_MAX_TAGS)); + minNumTagsPerUser= Integer.parseInt(properties.getProperty(USER_MIN_TAGS)); + maxNumTagsPerUser= Integer.parseInt(properties.getProperty(USER_MAX_TAGS)); maxNumPostPerMonth = Integer.parseInt(properties.getProperty(USER_MAX_POST_MONTH)); maxNumComments = Integer.parseInt(properties.getProperty(MAX_COMMENT_POST)); limitProCorrelated = Double.parseDouble(properties.getProperty(LIMIT_CORRELATED)); @@ -609,38 +619,38 @@ private void _init(int mapId, boolean isFullLoad) { } seedGenerate(mapId); - randPowerlaw = new PowerDistGenerator(minNoFriends, maxNoFriends + 1, alpha, seeds[2]); - randTagPowerlaw = new PowerDistGenerator(minNoTagsPerUser, maxNoTagsPerUser + 1, alpha, seeds[2]); - randUniform = new Random(seeds[3]); - randGender = new Random(seeds[3]); - randNumInterest = new Random(seeds[4]); - randNumTags = new Random(seeds[4]); + randomPowerLaw = new PowerDistGenerator(minNumFriends, maxNumFriends + 1, alpha, seeds[2]); + randomTagPowerLaw = new PowerDistGenerator(minNumTagsPerUser, maxNumTagsPerUser + 1, alpha, seeds[2]); + randomUniform = new Random(seeds[3]); + randomGender = new Random(seeds[3]); + randomNumInterests = new Random(seeds[4]); + randomNumTags = new Random(seeds[4]); randomFriendIdx = new Random(seeds[6]); - randFileSelect = new Random(seeds[7]); - randIdsInWindow = new Random(seeds[8]); - randNumberPost = new Random(seeds[9]); - randNumberComments = new Random(seeds[10]); - randNumberPhotoAlbum = new Random(seeds[11]); - randNumberPhotos = new Random(seeds[12]); - randNumberGroup = new Random(seeds[13]); - randNumberUserPerGroup = new Random(seeds[14]); - randMemberIdxSelector = new Random(seeds[18]); - randGroupMemStep = new Random(seeds[19]); - randFriendLevelSelect = new Random(seeds[20]); - randMembership = new Random(seeds[21]); - randGroupModerator = new Random(seeds[22]); + randomFileSelect = new Random(seeds[7]); + randomIdsInWindow = new Random(seeds[8]); + randomNumPosts = new Random(seeds[9]); + randomNumComments = new Random(seeds[10]); + randomNumPhotoAlbums = new Random(seeds[11]); + randomNumPhotos = new Random(seeds[12]); + randomNumGroups = new Random(seeds[13]); + randomNumUsersPerGroup = new Random(seeds[14]); + randomMemberIdxSelector = new Random(seeds[18]); + randomGroupMemStep = new Random(seeds[19]); + randomFriendLevelSelect = new Random(seeds[20]); + randomMembership = new Random(seeds[21]); + randomGroupModerator = new Random(seeds[22]); randomExtraInfo = new Random(seeds[27]); randomExactLongLat = new Random(seeds[27]); - randUserAgent = new Random(seeds[29]); - randNumberGroupPost = new Random(seeds[36]); - randFriendReject = new Random(seeds[37]); - randFriendReapprov = new Random(seeds[38]); - randInitiator = new Random(seeds[39]); + randomUserAgent = new Random(seeds[29]); + randomNumGroupPosts = new Random(seeds[36]); + randomFriendReject = new Random(seeds[37]); + randomFriendAproval = new Random(seeds[38]); + randomInitiator = new Random(seeds[39]); randomHaveStatus = new Random(seeds[41]); randomStatusSingle = new Random(seeds[42]); randomStatus = new Random(seeds[43]); - randNumPopularPlaces = new Random(seeds[47]); - randUserRandomIdx = new Random(seeds[48]); + randomNumPopularPlaces = new Random(seeds[47]); + randomUserRandomIdx = new Random(seeds[48]); reducedUserProfiles = new ReducedUserProfile[windowSize]; // Collect of reduced user profile cellReducedUserProfiles = new ReducedUserProfile[cellSize]; @@ -810,12 +820,12 @@ public void generateGroups(int pass, int cellPos, int numCell) { groupStoreManager.deserializeOneCellUserProfile(newIdxInWindow, cellSize, reducedUserProfiles); for (int i = 0; i < cellSize; i++) { int curIdxInWindow = newStartIndex + i; - double moderatorProb = randGroupModerator.nextDouble(); + double moderatorProb = randomGroupModerator.nextDouble(); if (moderatorProb <= groupModeratorProb) { Friend firstLevelFriends[] = reducedUserProfiles[curIdxInWindow].getFriendList(); Vector secondLevelFriends = new Vector(); //TODO: Include friends of friends a.k.a second level friends? - int numGroup = randNumberGroup.nextInt(maxNumGroupCreatedPerUser); + int numGroup = randomNumGroups.nextInt(maxNumGroupCreatedPerUser); for (int j = 0; j < numGroup; j++) { createGroupForUser(reducedUserProfiles[curIdxInWindow], firstLevelFriends, secondLevelFriends); @@ -824,6 +834,15 @@ public void generateGroups(int pass, int cellPos, int numCell) { } } + /** + * Pushes a user into the generator. + * + * @param reduceUser The user to push. + * @param pass The pass identifier, which is used to decide the criteria under the edges are created. + * @param context The map-reduce context. + * @param isContext + * @param oos The output stream used to write the data. + */ public void pushUserProfile(ReducedUserProfile reduceUser, int pass, Reducer.Context context, boolean isContext, ObjectOutputStream oos){ @@ -845,13 +864,22 @@ public void pushUserProfile(ReducedUserProfile reduceUser, int pass, context, cellReducedUserProfiles, isContext, - oos); + oos ); numUserForNewCell = 0; } } reduceUser = null; } + /** + * Creates the remainder of the edges for the currently inserted nodes. + * + * @param reduceUser The user to push. + * @param pass The pass identifier, which is used to decide the criteria under the edges are created. + * @param context The map-reduce context. + * @param isContext + * @param oos The output stream used to write the data. + */ public void pushAllRemainingUser(int pass, Reducer.Context context, boolean isContext, ObjectOutputStream oos){ @@ -861,6 +889,13 @@ public void pushAllRemainingUser(int pass, } } + /** + * Generates the users. + * + * @param pass The pass identifying the current pass. + * @param context The map-reduce context. + * @param mapIdx The index of the current map, used to determine how many users to generate. + */ public void mrGenerateUserInfo(int pass, Context context, int mapIdx){ int numToGenerateUser = (numMaps == mapIdx) ? numCellInLastFile : numCellPerfile; @@ -909,7 +944,7 @@ public void mr2InitFriendShipWindow(int pass, Reducer.Context context, boolean i if (!((reducedUserProfiles[j].getNumFriendsAdded() == reducedUserProfiles[j].getNumFriends(pass)) || reducedUserProfiles[i].isExistFriend(reducedUserProfiles[j].getAccountId()))) { - double randProb = randUniform.nextDouble(); + double randProb = randomUniform.nextDouble(); double prob = getFriendCreatePro(i, j, pass); if ((randProb < prob) || (randProb < limitProCorrelated)) { createFriendShip(reducedUserProfiles[i], reducedUserProfiles[j], (byte) pass); @@ -941,7 +976,7 @@ public void mr2SlideFriendShipWindow(int pass, int cellPos, Reducer.Context cont if ( !(reducedUserProfiles[checkFriendIdx].getNumFriendsAdded() == reducedUserProfiles[checkFriendIdx].getNumFriends(pass) || reducedUserProfiles[curIdxInWindow].isExistFriend(reducedUserProfiles[checkFriendIdx].getAccountId()))) { - double randProb = randUniform.nextDouble(); + double randProb = randomUniform.nextDouble(); double prob = getFriendCreatePro(curIdxInWindow, checkFriendIdx, pass); if ((randProb < prob) || (randProb < limitProCorrelated)) { createFriendShip(reducedUserProfiles[curIdxInWindow], reducedUserProfiles[checkFriendIdx], (byte) pass); @@ -970,7 +1005,7 @@ public void mr2SlideLastCellsFriendShip(int pass, int cellPos, int numleftCell, if ( !(reducedUserProfiles[checkFriendIdx].getNumFriendsAdded() == reducedUserProfiles[checkFriendIdx].getNumFriends(pass) || reducedUserProfiles[curIdxInWindow].isExistFriend(reducedUserProfiles[checkFriendIdx].getAccountId()))) { - double randProb = randUniform.nextDouble(); + double randProb = randomUniform.nextDouble(); double prob = getFriendCreatePro(curIdxInWindow, checkFriendIdx, pass); if ((randProb < prob) || (randProb < limitProCorrelated)) { createFriendShip(reducedUserProfiles[curIdxInWindow], reducedUserProfiles[checkFriendIdx], @@ -991,7 +1026,7 @@ public void generatePosts(ReducedUserProfile user, UserExtraInfo extraInfo){ int numPosts = getNumOfPost(user); for (int m = 0; m < numPosts; m++) { Post post = tagTextDic.createPost(user, maxNumLikes, userAgentDic, ipAddDictionary, browserDic); - Integer languageIndex = randUniform.nextInt(extraInfo.getLanguages().size()); + Integer languageIndex = randomUniform.nextInt(extraInfo.getLanguages().size()); post.setLanguage(extraInfo.getLanguages().get(languageIndex)); String countryName = locationDic.getLocationName((ipAddDictionary.getLocation(post.getIpAddress()))); @@ -1015,7 +1050,7 @@ public void generatePosts(ReducedUserProfile user, UserExtraInfo extraInfo){ serializer.gatherData(post); // Generate comments - int numComment = randNumberComments.nextInt(maxNumComments); + int numComment = randomNumComments.nextInt(maxNumComments); long lastCommentCreatedDate = post.getCreatedDate(); long lastCommentId = -1; long startCommentId = TagTextDictionary.commentId; @@ -1037,18 +1072,18 @@ public void generatePosts(ReducedUserProfile user, UserExtraInfo extraInfo){ public void generatePhoto(ReducedUserProfile user, UserExtraInfo extraInfo){ // Generate photo Album and photos int numOfmonths = (int) dateTimeGenerator.numberOfMonths(user); - int numPhotoAlbums = randNumberPhotoAlbum.nextInt(maxNumPhotoAlbumsPerMonth); + int numPhotoAlbums = randomNumPhotoAlbums.nextInt(maxNumPhotoAlbumsPerMonth); if (numOfmonths != 0) { numPhotoAlbums = numOfmonths * numPhotoAlbums; } for (int m = 0; m < numPhotoAlbums; m++) { - Group album = groupGenerator.createAlbum(user, extraInfo, m, randMembership, joinProbs[0]); + Group album = groupGenerator.createAlbum(user, extraInfo, m, randomMembership, joinProbs[0]); serializer.gatherData(album); // Generate photos for this album - int numPhotos = randNumberPhotos.nextInt(maxNumPhotoPerAlbums); + int numPhotos = randomNumPhotos.nextInt(maxNumPhotoPerAlbums); for (int l = 0; l < numPhotos; l++) { Photo photo = photoGenerator.generatePhoto(user, album, l, maxNumLikes); @@ -1071,21 +1106,21 @@ public void createGroupForUser(ReducedUserProfile user, Group group = groupGenerator.createGroup(user); - HashSet memberIds = new HashSet(); + TreeSet memberIds = new TreeSet(); - int numGroupMember = randNumberUserPerGroup.nextInt(maxNumMemberGroup); + int numGroupMember = randomNumUsersPerGroup.nextInt(maxNumMemberGroup); group.initAllMemberships(numGroupMember); int numLoop = 0; while ((group.getNumMemberAdded() < numGroupMember) && (numLoop < windowSize)) { numLoop++; - randLevelProb = randFriendLevelSelect.nextDouble(); + randLevelProb = randomFriendLevelSelect.nextDouble(); // Select the appropriate friend level if (randLevelProb < levelProbs[0]) { // ==> level 1 // Find a friendIdx - int friendIdx = randMemberIdxSelector.nextInt(user.getNumFriendsAdded()); + int friendIdx = randomMemberIdxSelector.nextInt(user.getNumFriendsAdded()); // Note: Use user.getNumFriendsAdded(), do not use // firstLevelFriends.length // because we allocate a array for friendLists, but do not @@ -1094,7 +1129,7 @@ public void createGroupForUser(ReducedUserProfile user, int potentialMemberAcc = firstLevelFriends[friendIdx].getFriendAcc(); - randMemberProb = randMembership.nextDouble(); + randMemberProb = randomMembership.nextDouble(); if (randMemberProb < joinProbs[0]) { // Check whether this user has been added and then add to the group if (!memberIds.contains(potentialMemberAcc)) { @@ -1108,9 +1143,9 @@ public void createGroupForUser(ReducedUserProfile user, } } else if (randLevelProb < levelProbs[1]) { // ==> level 2 if (secondLevelFriends.size() != 0) { - int friendIdx = randMemberIdxSelector.nextInt(secondLevelFriends.size()); + int friendIdx = randomMemberIdxSelector.nextInt(secondLevelFriends.size()); int potentialMemberAcc = secondLevelFriends.get(friendIdx).getFriendAcc(); - randMemberProb = randMembership.nextDouble(); + randMemberProb = randomMembership.nextDouble(); if (randMemberProb < joinProbs[1]) { // Check whether this user has been added and then add to the group if (!memberIds.contains(potentialMemberAcc)) { @@ -1125,9 +1160,9 @@ public void createGroupForUser(ReducedUserProfile user, } } else { // ==> random users // Select a user from window - int friendIdx = randMemberIdxSelector.nextInt(windowSize); + int friendIdx = randomMemberIdxSelector.nextInt(windowSize); int potentialMemberAcc = reducedUserProfiles[friendIdx].getAccountId(); - randMemberProb = randMembership.nextDouble(); + randMemberProb = randomMembership.nextDouble(); if (randMemberProb < joinProbs[2]) { // Check whether this user has been added and then add to the group if (!memberIds.contains(potentialMemberAcc)) { @@ -1149,7 +1184,7 @@ public void generatePostForGroup(Group group) { int numberGroupPost = getNumOfGroupPost(group); for (int i = 0; i < numberGroupPost; i++) { Post groupPost = tagTextDic.createPost(group, maxNumLikes, userAgentDic, ipAddDictionary, browserDic); -// Integer languageIndex = randUniform.nextInt(extraInfo.getLanguages().size()); +// Integer languageIndex = randomUniform.nextInt(extraInfo.getLanguages().size()); // post.setLanguage(extraInfo.getLanguages().get(languageIndex)); groupPost.setLanguage(-1); String countryName = locationDic.getLocationName((ipAddDictionary.getLocation(groupPost.getIpAddress()))); @@ -1157,7 +1192,7 @@ public void generatePostForGroup(Group group) { serializer.gatherData(groupPost); - int numComment = randNumberComments.nextInt(maxNumComments); + int numComment = randomNumComments.nextInt(maxNumComments); long lastCommentCreatedDate = groupPost.getCreatedDate(); long lastCommentId = -1; long startCommentId = TagTextDictionary.commentId; @@ -1184,12 +1219,12 @@ public int getNumOfPost(ReducedUserProfile user) { int numOfmonths = (int) dateTimeGenerator.numberOfMonths(user); int numberPost; if (numOfmonths == 0) { - numberPost = randNumberPost.nextInt(maxNumPostPerMonth); + numberPost = randomNumPosts.nextInt(maxNumPostPerMonth); } else { - numberPost = randNumberPost.nextInt(maxNumPostPerMonth * numOfmonths); + numberPost = randomNumPosts.nextInt(maxNumPostPerMonth * numOfmonths); } - numberPost = (numberPost * user.getNumFriendsAdded()) / maxNoFriends; + numberPost = (numberPost * user.getNumFriendsAdded()) / maxNumFriends; return numberPost; } @@ -1200,9 +1235,9 @@ public int getNumOfGroupPost(Group group) { int numberPost; if (numOfmonths == 0) { - numberPost = randNumberGroupPost.nextInt(maxNumGroupPostPerMonth); + numberPost = randomNumGroupPosts.nextInt(maxNumGroupPostPerMonth); } else { - numberPost = randNumberGroupPost.nextInt(maxNumGroupPostPerMonth * numOfmonths); + numberPost = randomNumGroupPosts.nextInt(maxNumGroupPostPerMonth * numOfmonths); } numberPost = (numberPost * group.getNumMemberAdded()) / maxNumMemberGroup; @@ -1223,7 +1258,7 @@ public UserProfile generateGeneralInformation(int accountId) { userProf.setCreatedDate(dateTimeGenerator.randomDateInMillis()); - userProf.setNumFriends((short) randPowerlaw.getValue()); + userProf.setNumFriends((short) randomPowerLaw.getValue()); userProf.allocateFriendListMemory(NUM_FRIENDSHIP_HADOOP_JOBS); short totalFriendSet = 0; @@ -1246,7 +1281,7 @@ public UserProfile generateGeneralInformation(int accountId) { userProf.setMainTagId(userMainTag); - userProf.setNumTags((short) randTagPowerlaw.getValue()); + userProf.setNumTags((short) randomTagPowerLaw.getValue()); userProf.setSetOfTags(topicTagDic.getSetofTags(userMainTag, userProf.getNumTags())); @@ -1254,14 +1289,14 @@ public UserProfile generateGeneralInformation(int accountId) { userProf.setBirthDay(dateTimeGenerator.getBirthDay(userProf.getCreatedDate())); - byte gender = (randGender.nextDouble() > 0.5) ? (byte)1 : (byte)0; + byte gender = (randomGender.nextDouble() > 0.5) ? (byte)1 : (byte)0; userProf.setGender(gender); userProf.setForumWallId(accountId * 2); // Each user has an wall userProf.setForumStatusId(accountId * 2 + 1); // User's Agent - userProf.setHaveSmartPhone(randUserAgent.nextDouble() > probHavingSmartPhone); + userProf.setHaveSmartPhone(randomUserAgent.nextDouble() > probHavingSmartPhone); if (userProf.isHaveSmartPhone()) { userProf.setAgentIdx(userAgentDic.getRandomUserAgentIdx()); } @@ -1274,7 +1309,7 @@ public UserProfile generateGeneralInformation(int accountId) { .getRandomIPFromLocation(userProf.getLocationIdx())); // Popular places - byte numPopularPlaces = (byte) randNumPopularPlaces.nextInt(maxNumPopularPlaces + 1); + byte numPopularPlaces = (byte) randomNumPopularPlaces.nextInt(maxNumPopularPlaces + 1); userProf.setNumPopularPlace(numPopularPlaces); short popularPlaces[] = new short[numPopularPlaces]; for (int i = 0; i < numPopularPlaces; i++){ @@ -1287,7 +1322,7 @@ public UserProfile generateGeneralInformation(int accountId) { userProf.setPopularPlaceIds(popularPlaces); // Get random Idx - userProf.setRandomIdx(randUserRandomIdx.nextInt(USER_RANDOM_ID_LIMIT)); + userProf.setRandomIdx(randomUserRandomIdx.nextInt(USER_RANDOM_ID_LIMIT)); return userProf; } @@ -1437,7 +1472,7 @@ public void setInfoFromUserProfile(ReducedUserProfile user, // NOTE: [2013-08-06] The tags of posts, forums, etc.. all depend of the user ones // if in the future this fact change add those in the statistics also. - HashSet tags = user.getSetOfTags(); + TreeSet tags = user.getSetOfTags(); for (Integer tagID : tags) { stats.tagNames.add(mainTagDic.getName(tagID)); Integer parent = mainTagDic.getTagClass(tagID); @@ -1457,18 +1492,18 @@ public double getFriendCreatePro(int i, int j, int pass){ prob = Math.pow(baseProbCorrelated, (j + windowSize - i)); } return prob; - } + public void createFriendShip(ReducedUserProfile user1, ReducedUserProfile user2, byte pass) { long requestedTime = dateTimeGenerator.randomFriendRequestedDate(user1, user2); - byte initiator = (byte) randInitiator.nextInt(2); + byte initiator = (byte) randomInitiator.nextInt(2); long createdTime = -1; long declinedTime = -1; - if (randFriendReject.nextDouble() > friendRejectRatio) { + if (randomFriendReject.nextDouble() > friendRejectRatio) { createdTime = dateTimeGenerator.randomFriendApprovedDate(requestedTime); } else { declinedTime = dateTimeGenerator.randomFriendDeclinedDate(requestedTime); - if (randFriendReapprov.nextDouble() < friendReApproveRatio) { + if (randomFriendAproval.nextDouble() < friendReApproveRatio) { createdTime = dateTimeGenerator.randomFriendReapprovedDate(declinedTime); } } @@ -1478,7 +1513,7 @@ public void createFriendShip(ReducedUserProfile user1, ReducedUserProfile user2, user1.addNewFriend(new Friend(user2, requestedTime, declinedTime, createdTime, pass, initiator)); - friendshipNo++; + friendshipNum++; } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Photo.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Photo.java index 3970c615d..bd5beaf12 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Photo.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Photo.java @@ -36,7 +36,7 @@ */ package ldbc.socialnet.dbgen.objects; -import java.util.HashSet; +import java.util.TreeSet; public class Photo { long photoId; @@ -48,7 +48,7 @@ public class Photo { double latt; double longt; long takenTime; - HashSet tags; + TreeSet tags; int[] interestedUserAccs; long[] interestedUserAccsTimestamp; @@ -114,10 +114,10 @@ public long getTakenTime() { public void setTakenTime(long takenTime) { this.takenTime = takenTime; } - public HashSet getTags() { + public TreeSet getTags() { return tags; } - public void setTags(HashSet tags) { + public void setTags(TreeSet tags) { this.tags = tags; } public double getLatt() { diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Post.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Post.java index f67dc7e3a..13de72b86 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Post.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/Post.java @@ -36,7 +36,7 @@ */ package ldbc.socialnet.dbgen.objects; -import java.util.HashSet; +import java.util.TreeSet; public class Post { long postId; @@ -50,7 +50,7 @@ public class Post { int articleIdx; // Index of articles in the set of same region/interest article int interestIdx; // Index of one interest in user's interests - HashSet tags; + TreeSet tags; int interestedUserAccs[]; //List of users who are interested in the post long interestedUserAccsTimestamp[]; @@ -73,10 +73,10 @@ public void setInterestIdx(int interestIdx) { this.interestIdx = interestIdx; } - public HashSet getTags() { + public TreeSet getTags() { return tags; } - public void setTags(HashSet tags) { + public void setTags(TreeSet tags) { this.tags = tags; } public int[] getInterestedUserAccs() { diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/ReducedUserProfile.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/ReducedUserProfile.java index 96832873f..179df4d90 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/ReducedUserProfile.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/ReducedUserProfile.java @@ -42,7 +42,7 @@ import java.io.Serializable; import java.util.Arrays; import java.util.GregorianCalendar; -import java.util.HashSet; +import java.util.TreeSet; import java.util.Iterator; import org.apache.hadoop.io.Writable; @@ -61,7 +61,7 @@ public class ReducedUserProfile implements Serializable, Writable{ Friend friendList[]; - HashSet friendIds; // Use a hashset for checking the existence + TreeSet friendIds; // Use a Treeset for checking the existence int dicElementIds[]; // Id of an element in a dictionary, e.g., locationId @@ -82,7 +82,7 @@ public class ReducedUserProfile implements Serializable, Writable{ int cityIdx; int forumWallId; int forumStatusId; - HashSet setOfTags; + TreeSet setOfTags; short popularPlaceIds[]; byte numPopularPlace; @@ -122,13 +122,13 @@ private void readObject(java.io.ObjectInputStream stream) numPassFriendsAdded[i] = stream.readShort(); } friendList = new Friend[numFriends]; - friendIds = new HashSet(numFriends); + friendIds = new TreeSet(); for (int i = 0; i < numFriendsAdded; i++){ Friend fr = new Friend(); fr.readFields(stream); friendList[i] = fr; } - //Read the size of hashset first + //Read the size of Treeset first int size = stream.readInt(); for (int i = 0; i < size; i++){ friendIds.add(stream.readInt()); @@ -153,7 +153,7 @@ private void readObject(java.io.ObjectInputStream stream) forumStatusId = stream.readInt(); byte numOfTags = stream.readByte(); - setOfTags = new HashSet(numOfTags); + setOfTags = new TreeSet(); for (byte i = 0; i < numOfTags;i++){ setOfTags.add(stream.readInt()); } @@ -186,7 +186,7 @@ private void writeObject(java.io.ObjectOutputStream stream) for (int i = 0; i < numFriendsAdded; i++){ friendList[i].write(stream); } - //Read the size of hashset first + //Read the size of Treeset first stream.writeInt(friendIds.size()); Iterator it = friendIds.iterator(); while (it.hasNext()){ @@ -243,13 +243,13 @@ public void readFields(DataInput arg0) throws IOException { numPassFriendsAdded[i] = arg0.readShort(); } friendList = new Friend[numFriends]; - friendIds = new HashSet(numFriends); + friendIds = new TreeSet(); for (int i = 0; i < numFriendsAdded; i++){ Friend fr = new Friend(); fr.readFields(arg0); friendList[i] = fr; } - //Read the size of hashset first + //Read the size of Treeset first int size = arg0.readInt(); for (int i = 0; i < size; i++){ friendIds.add(arg0.readInt()); @@ -274,7 +274,7 @@ public void readFields(DataInput arg0) throws IOException { forumStatusId = arg0.readInt(); byte numTags = arg0.readByte(); - setOfTags = new HashSet(numTags); + setOfTags = new TreeSet(); for (byte i = 0; i < numTags;i++){ setOfTags.add(arg0.readInt()); } @@ -345,7 +345,7 @@ public void write(DataOutput arg0) throws IOException { for (int i = 0; i < numFriendsAdded; i++){ friendList[i].write(arg0); } - //Read the size of hashset first + //Read the size of Treeset first arg0.writeInt(friendIds.size()); Iterator it = friendIds.iterator(); while (it.hasNext()){ @@ -496,7 +496,7 @@ public void setNumFriendsAdded(short numFriendsAdded) { public void allocateFriendListMemory(){ friendList = new Friend[numFriends]; - friendIds = new HashSet(numFriends); + friendIds = new TreeSet(); } public Friend[] getFriendList() { @@ -597,10 +597,10 @@ public int getForumStatusId() { public void setForumStatusId(int forumStatusId) { this.forumStatusId = forumStatusId; } - public HashSet getSetOfTags() { + public TreeSet getSetOfTags() { return setOfTags; } - public void setSetOfTags(HashSet setOfTags) { + public void setSetOfTags(TreeSet setOfTags) { this.setOfTags = setOfTags; } public byte getNumPopularPlace() { @@ -635,7 +635,7 @@ public short[] getNumPassFriendsAdded() { public void setNumPassFriendsAdded(short[] numPassFriendsAdded) { this.numPassFriendsAdded = numPassFriendsAdded; } - public HashSet getFriendIds() { + public TreeSet getFriendIds() { return friendIds; } public int[] getDicElementIds() { diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserExtraInfo.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserExtraInfo.java index b5ecfc4ca..9fe87a085 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserExtraInfo.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserExtraInfo.java @@ -37,14 +37,14 @@ package ldbc.socialnet.dbgen.objects; import java.util.HashMap; -import java.util.HashSet; +import java.util.TreeSet; import java.util.Set; import java.util.Vector; public class UserExtraInfo { String gender; //long dateOfBirth; - HashSet email; + TreeSet email; Vector languages; int nativeLanguage; //String sourceIp; // Source IP address @@ -65,7 +65,7 @@ public class UserExtraInfo { public UserExtraInfo() { - email = new HashSet(); + email = new TreeSet(); companies = new HashMap(); } @@ -90,7 +90,7 @@ public String getGender() { public void setGender(String gender) { this.gender = gender; } - public HashSet getEmail() { + public TreeSet getEmail() { return email; } public void addEmail(String email) { diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserProfile.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserProfile.java index 896055381..54d48ee9d 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserProfile.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/objects/UserProfile.java @@ -37,7 +37,7 @@ package ldbc.socialnet.dbgen.objects; import java.io.Serializable; -import java.util.HashSet; +import java.util.TreeSet; import java.util.Iterator; public class UserProfile implements Serializable { @@ -65,9 +65,9 @@ public class UserProfile implements Serializable { public short numFriendsAdded; Friend friendList[]; - HashSet friendIds; // Use a hashset for checking the existence + TreeSet friendIds; // Use a Treeset for checking the existence - HashSet setOfTags; + TreeSet setOfTags; int mainTagId; //For user's agent information @@ -95,7 +95,7 @@ public UserProfile(int accountId) { forumWallId = -1; forumStatusId = -1; - setOfTags = new HashSet(); + setOfTags = new TreeSet(); } public byte getGender() { @@ -189,7 +189,7 @@ public short getNumPassFriends(int pass) { public void setNumPassFriends(short numPassFriends, int pass) { this.numPassFriends[pass] = numPassFriends; } - public HashSet getSetOfTags() { + public TreeSet getSetOfTags() { return setOfTags; } public int getFirstTagIdx(){ @@ -199,13 +199,13 @@ public int getFirstTagIdx(){ return tagIdx; } - public void setSetOfTags(HashSet setOfTags) { + public void setSetOfTags(TreeSet setOfTags) { this.setOfTags = setOfTags; } public void allocateFriendListMemory(int numFriendPasses){ friendList = new Friend[numFriends]; - friendIds = new HashSet(numFriends); + friendIds = new TreeSet(); numPassFriends = new short[numFriendPasses]; } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Statistics.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Statistics.java index 46c39edb5..f1933fc02 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Statistics.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/serializer/Statistics.java @@ -3,7 +3,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; +import java.util.TreeSet; import ldbc.socialnet.dbgen.dictionary.LocationDictionary; @@ -53,19 +53,19 @@ public int compareTo(CountryPair pair) { public String maxWorkFrom; public String minPostCreationDate; public String maxPostCreationDate; - public HashSet firstNames; - public HashSet tagNames; - public HashSet countries; - public HashSet tagClasses; + public TreeSet firstNames; + public TreeSet tagNames; + public TreeSet countries; + public TreeSet tagClasses; private ArrayList countryPairs; public Statistics() { minPersonId = Integer.MAX_VALUE; maxPersonId = Integer.MIN_VALUE; - firstNames = new HashSet(); - tagNames = new HashSet(); - tagClasses = new HashSet(); - countries = new HashSet(); + firstNames = new TreeSet(); + tagNames = new TreeSet(); + tagClasses = new TreeSet(); + countries = new TreeSet(); countryPairs = new ArrayList(); } From d58972c696e96816d30f2c1b15e211de90f23746 Mon Sep 17 00:00:00 2001 From: Arnau Date: Wed, 8 Jan 2014 10:14:26 +0100 Subject: [PATCH 08/10] corretgit bug int->Integer --- .../main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java index 053a63477..935833363 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java @@ -123,7 +123,7 @@ public void generateAllGPSForAllEvents(StreamStoreManager storeMng){ for (int i = 0; i < eventSet.size(); i++){ int numAttendedUsers = randNumUser.nextInt(100) + 20; //int lastUserId = 0; - TreeSet attendedUsers = new TreeSet(numAttendedUsers); + TreeSet attendedUsers = new TreeSet(Integer(numAttendedUsers)); while (attendedUsers.size() < numAttendedUsers){ //int step = randNumUser.nextInt(totalNumUsers - numAttendedUsers- lastUserId + j); //lastUserId = lastUserId + step + j; From 83b79d056df0b0b704a78b1ca8a2fad7ff7c9fd6 Mon Sep 17 00:00:00 2001 From: Arnau Date: Wed, 8 Jan 2014 10:50:27 +0100 Subject: [PATCH 09/10] fixed GPSGenerator.java --- .../java/ldbc/socialnet/dbgen/generator/GPSGenerator.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java index 935833363..df8acb5b9 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/GPSGenerator.java @@ -40,6 +40,7 @@ import java.util.Iterator; import java.util.Random; import java.util.Vector; +import java.lang.Integer; import ldbc.socialnet.dbgen.objects.Event; import ldbc.socialnet.dbgen.objects.GPS; @@ -123,7 +124,8 @@ public void generateAllGPSForAllEvents(StreamStoreManager storeMng){ for (int i = 0; i < eventSet.size(); i++){ int numAttendedUsers = randNumUser.nextInt(100) + 20; //int lastUserId = 0; - TreeSet attendedUsers = new TreeSet(Integer(numAttendedUsers)); +// TreeSet attendedUsers = new TreeSet(new Integer(numAttendedUsers)); + TreeSet attendedUsers = new TreeSet(); while (attendedUsers.size() < numAttendedUsers){ //int step = randNumUser.nextInt(totalNumUsers - numAttendedUsers- lastUserId + j); //lastUserId = lastUserId + step + j; From c8b8800e6bf8c5b297a6648cde7a3843fd6f7565 Mon Sep 17 00:00:00 2001 From: Arnau Date: Thu, 23 Jan 2014 18:33:14 +0100 Subject: [PATCH 10/10] Added dictionary citiesByCountry. Fixed som minor bugs in order to improve robustness. --- .../dbgen/dictionary/LocationDictionary.java | 13 +- .../dictionary/OrganizationsDictionary.java | 12 +- .../dbgen/generator/ScalableGenerator.java | 3 +- .../dictionaries/citiesByCountry.txt | 1367 +++++++++++++++++ .../resources/dictionaries/dicLocations.txt | 111 ++ 5 files changed, 1495 insertions(+), 11 deletions(-) create mode 100644 ldbc_socialnet_dbgen/src/main/resources/dictionaries/citiesByCountry.txt create mode 100644 ldbc_socialnet_dbgen/src/main/resources/dictionaries/dicLocations.txt diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LocationDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LocationDictionary.java index b942e76ff..6f1135cc9 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LocationDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/LocationDictionary.java @@ -56,7 +56,7 @@ public class LocationDictionary { public static final int INVALID_LOCATION = -1; private static final String SEPARATOR = " "; - private static final String SEPARATOR_CITY = " "; + private static final String SEPARATOR_CITY = " "; int numUsers; int curLocationIdx; @@ -195,7 +195,9 @@ public int getRandomCity(int countryId) { } if (citiesFromCountry.get(countryId).size() == 0) { - System.err.println("Country with no known cities"); + Location location = locations.get(countryId); + String countryName = location.getName(); + System.err.println("Country with no known cities: "+countryName); return INVALID_LOCATION; } @@ -235,12 +237,13 @@ private void readCities() { String line; while ((line = dictionary.readLine()) != null){ String data[] = line.split(SEPARATOR_CITY); +// System.err.println(data[0]); if (countryNames.containsKey(data[0])) { Integer countryId = countryNames.get(data[0]); - if (!cityNames.containsKey(data[2])) { + if (!cityNames.containsKey(data[1])) { Location location = new Location(); location.setId(locations.size()); - location.setName(data[2]); + location.setName(data[1]); location.setLatt(locations.get(countryId).getLatt()); location.setLongt(locations.get(countryId).getLongt()); location.setPopulation(-1); @@ -250,7 +253,7 @@ private void readCities() { isPartOf.put(location.getId(), countryId); citiesFromCountry.get(countryId).add(location.getId()); - cityNames.put(data[2], location.getId()); + cityNames.put(data[1], location.getId()); cities++; } diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/OrganizationsDictionary.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/OrganizationsDictionary.java index ff724f323..44e615e37 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/OrganizationsDictionary.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/dictionary/OrganizationsDictionary.java @@ -102,7 +102,8 @@ public void extractOrganizationNames() { String data[] = line.split(SEPARATOR); String locationName = data[0]; if (locationName.compareTo(lastLocationName) != 0) { - if (locationDic.getCountryId(locationName) != LocationDictionary.INVALID_LOCATION) { + if (locationDic.getCountryId(locationName) != LocationDictionary.INVALID_LOCATION && + locationDic.getCityId(data[2]) != LocationDictionary.INVALID_LOCATION ) { lastLocationName = locationName; curLocationId = locationDic.getCountryId(locationName); String organizationName = data[1].trim(); @@ -110,8 +111,10 @@ public void extractOrganizationNames() { Integer cityId = locationDic.getCityId(data[2]); organizationToLocation.put(organizationName, cityId); totalNumOrganizations++; - } - } else{ + } /*else { + System.err.println("ERROR:Invalid country or city of organization: "+locationName+" "+data[2]); + }*/ + } else if( locationDic.getCityId(data[2]) != LocationDictionary.INVALID_LOCATION ) { String organizationName = data[1].trim(); organizationsByLocations.get(curLocationId).add(organizationName); Integer cityId = locationDic.getCityId(data[2]); @@ -128,9 +131,8 @@ public void extractOrganizationNames() { // 90% of people go to top-10 universities // 10% go to remaining universities - public int getRandomOrganization(int countryId) { + public int getRandomOrganization(int locationId) { - int locationId = countryId; double prob = randUnRelatedOrganization.nextDouble(); Vector countries = locationDic.getCountries(); diff --git a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java index 46c85fd22..e544d0834 100644 --- a/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java +++ b/ldbc_socialnet_dbgen/src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java @@ -164,7 +164,8 @@ public enum OrganisationType { private static final String emailDicFile = DICTIONARY_DIRECTORY + "email.txt"; private static final String givennamesDicFile = DICTIONARY_DIRECTORY + "givennameByCountryBirthPlace.txt.freq.full"; private static final String organizationsDicFile = DICTIONARY_DIRECTORY + "institutesCityByCountry.txt"; - private static final String cityDicFile = DICTIONARY_DIRECTORY + "institutesCityByCountry.txt"; +// private static final String cityDicFile = DICTIONARY_DIRECTORY + "institutesCityByCountry.txt"; + private static final String cityDicFile = DICTIONARY_DIRECTORY + "citiesByCountry.txt"; private static final String languageDicFile = DICTIONARY_DIRECTORY + "languagesByCountry.txt"; private static final String popularPlacesDicFile = DICTIONARY_DIRECTORY + "popularPlacesByCountry.txt"; private static final String agentFile = DICTIONARY_DIRECTORY + "smartPhonesProviders.txt"; diff --git a/ldbc_socialnet_dbgen/src/main/resources/dictionaries/citiesByCountry.txt b/ldbc_socialnet_dbgen/src/main/resources/dictionaries/citiesByCountry.txt new file mode 100644 index 000000000..08b695889 --- /dev/null +++ b/ldbc_socialnet_dbgen/src/main/resources/dictionaries/citiesByCountry.txt @@ -0,0 +1,1367 @@ +India Pondicherry +India Rewari +India Mysore +India Thane +India Shibpur +India Rohtak +India Jiaganj_Azimganj +India Andhra_Pradesh +India Shillong +India Baharampur +India Bhopal +India Tezpur +India Rourkela +India Chennai +India Noida +India Jammu +India Kalyani +India Delhi +India Gobichettipalayam +India Kochi +India Srinagar +India Ahmedabad +India Pune +India Haldia +India Nagpur +India Kakinada +India Berhampur +India Darbhanga +India Thirthahalli +India Bikaner +India Vellore +India Surat +India Anantapur +India Cooch_Behar +India Hubli +India Hamirpur +India Kanpur +India India +India Tiruchirappalli +India Ujjain +India Visakhapatnam +India Gandhinagar +India Kannur +India Agra +India Loni +India Jind +India Durgapur +India Sangrur +India Raichur +India Bardhaman +India Moga +India Amritsar +India Srikakulam +India Anantapur_district +India Kolkata +India Orissa +India Kalady +India Coimbatore +India Gurgaon +India Purulia +India Kodambakkam +India Jodhpur +India Mahishadal +India Kurukshetra +India Meerut +India Faizabad +India Bangalore +India Kota +India Lucknow +India Kalimpong +India Cuttack +India Alappuzha +India Davangere +India Siliguri +India Bidar +India Puttur +India Maharashtra +India Kolhapur +India Habra +India Bihar +India Hyderabad +India Kadapa +India Bhilai +India Tirunelveli +India Moradabad +India Tiruvallur_district +India Nashik +India Anand +India Sion +India Aizawl +India Jhansi +India Serampore +India Asansol +India Varanasi +India Navi_Mumbai +India Vadodara +India Patna +India Jajapur +India Bareilly +India Tamil_Nadu +India Ranchi +India Gulbarga +India Erode +India Arambag +India Howrah +India Jaipur +India Kasaragod +India Vallabh_Vidhyanagar +India Suri +India Assam +India Greater_Noida +India Jorhat +India Allahabad +India Rajasthan +India Aurangabad +India Hisar +India Haldwani +India Dum_Dum +India Vijayawada +India Solapur +India Malappuram +India Thanjavur +India Dharwad +India Nalgonda +India English_Bazar +India Bankura +India Belgaum +India Namakkal +India Balasore +India Palayamkottai +India Chetpet +India Mangalore +India Nellore +India Patiala +India Madurai +India Karaikudi +India Karnataka +India Bahadurgarh +India Silchar +India Kandivali +India Ghaziabad +India Rajahmundry +India Dehradun +India Kozhikode +India Salem +India Barasat +India Guwahati +India Shimoga +India Kollam +India Krishnanagar +India Ernakulam +India Mumbai +India Talcher +India Warangal +India Goa +India Ajmer +India Raipur +India Kerala +India Jalpaiguri +India Brahmavar +India Manipal +India Udaipur +India Tiruvallur +India Dhenkanal +India Thrissur +India Bandra +India Chandigarh +India Munsirhat +India Tirupati +India Midnapore +India Poonamallee +India Nalanda +India Jabalpur +India Faridabad +India Rupnagar +India Guntur +India Kottayam +India Bhubaneswar +India Fatehgarh_Sahib +India Barrackpore +India Gaya +India Kashipur +India Gangtok +India Nerul +India Bilaspur +India Thiruchendur +India Gwalior +India Gujarat +India Indore +India Palakkad +India New_Delhi +India Secunderabad +India Durg +India Raniganj +India Thiruvananthapuram +India Jamshedpur +India Bhagalpur +India Aligarh +India Sambalpur +India Latur +China Zhengzhou +China Chishui +China Donggang +China Dandong +China Chaohu +China Changji +China Bayannur +China Zhangzhou +China Huanggang +China Genhe +China Baicheng +China Shenzhen +China Deyang +China Baiyin +China Bole +China Delhi +China Xi'an +China Ezhou +China Baoshan +China Baotou +China Danyang +China Changzhi +China Yueyang +China Diaobingshan +China Fuyang +China Dingxi +China Dunhuang +China Nanning +China Altay +China Fuqing +China Bei'an +China Anguo +China Beihai +China Daye +China Xinzheng +China Harbin +China Anshan +China Duyun +China Dongtai +China Xinxiang +China Macau +China Da'an +China Shenyang +China Dengta +China Bijie +China Aral +China Fujin +China Chuzhou +China Cenxi +China Songjiang_District +China Benxi +China Datong +China Changde +China Bazhong +China Guilin +China Mbabane +China Gaocheng +China Anshun +China Baise +China Dujiangyan +China Fangchenggang +China Erenhot +China Baoding +China Ergun +China Dunhua +China Gaoping +China Guangzhou +China Hefei +China Changle +China Luoyang +China Beipiao +China Dezhou +China Chongzuo +China Baoji +China Chenzhou +China Pudong +China Shijiazhuang +China Hangzhou +China Fuzhou +China Changzhou +China Taiyuan +China Foshan +China Ankang +China Gejiu +China Dongyang +China Anning +China Dexing +China Zhuhai +China Gaomi +China Ürümqi +China Beiliu +China Fuquan +China Chaozhou +China Conghua +China Dangyang +China Fukang +China Chaoyang +China Fuxin +China Changyi +China Chongzhou +China Enping +China Dehui +China Quanzhou +China Emeishan +China Anqiu +China Beitun +China Changning +China Gaizhou +China Dongguan +China Lanzhou +China Beibei_District +China Shantou +China Dazhou +China Feicheng +China Yantai +China Gaoyao +China Dali +China Kunming +China Fu'an +China Brooklyn +China Botou +China Danzhou +China Bozhou +China Danjiangkou +China Nanchang +China Baishan +China Gaozhou +China Anda +China Arxan +China Changge +China Changshu +China Nan'an_District +China Nanchong +China Putian +China Dafeng +China Dongfang +China Chizhou +China Beijing +China Haidian_District +China Cixi +China Xiangtan +China Dongying +China Fenyang +China Hengyang +China Anlu +China Shihezi +China Golmud +China Fuyu +China Gaobeidian +China Tianjin +China Shanghai +China Beizhen +China Cangzhou +China Fuzhou, +China Fengcheng, +China Chibi +China Fengzhen +China Chuxiong +China Qiqihar +China Hong_Kong +China Alashankou +China Huainan +China Ma'anshan +China Changchun +China Wuhan +China Gaoyou +China Yinchuan +China Fushun +China Dongxing +China Chifeng +China Qingdao +China Chongqing +China Fuding +China Dengzhou +China Gongyi +China Dengfeng +China Daqing +China Changsha +China Anqing +China Bazhou +China Dingzhou +China Suzhou +China Fuling_District +China Anyang +China Dalian +China Enshi +China Chengdu +China Binzhou +China Fuyang, +China Jinan +China Nanjing +China Bengbu +China Ganzhou +China Chengde +China Kowloon +China Fenghua +China Gao'an +China Artux +China Dashiqiao +China Shanxi +Angola Luanda +Austria Vienna +Azerbaijan Baku +Bolivia Carmen_Pampa +Bosnia_and_Herzegovina Sarajevo +Bulgaria Sofia +Chad Chad +Croatia Zagreb +Denmark Copenhagen +Dominican_Republic Santo_Domingo +El_Salvador San_Salvador +Estonia Tallinn +Finland Helsinki +Honduras Tegucigalpa +Hong_Kong Ma_Liu_Shui +Israel Jerusalem +Jordan Amman +Laos Vientiane +Latvia Riga +Liberia Monrovia +Libya Benghazi +Lithuania Vilnius +Malta San_Gwann +Mauritania Nouakchott +Mauritius Moka +Mongolia Ulan_Bator +Namibia Windhoek +New_Zealand Auckland +Nicaragua Managua +Northern_Ireland Belfast +Norway Oslo +Oman Muscat +Panama Panama_City +Papua_New_Guinea Port_Moresby +Puerto_Rico Ponce +Republic_of_Ireland Dublin +Republic_of_Macedonia Skopje +Rwanda Butare +Scotland Glasgow +Singapore Singapore +Slovakia Bratislava +Slovenia Ljubljana +Swaziland Kwaluseni +Switzerland Geneva +Tajikistan Dushanbe +Uruguay Montevideo +Wales Swansea +Brazil Guanhães +Brazil Santos +Brazil São_Luís +Brazil Novo_Hamburgo +Brazil Rio_de_Janeiro +Brazil Petrópolis +Brazil Passos +Brazil Curitiba +Brazil Poços_de_Caldas +Brazil Santa_Maria +Brazil Goiânia +Brazil Londrina +Brazil Diamantina +Brazil Feira_de_Santana +Brazil João_Monlevade +Brazil Barbacena +Brazil Manaus +Brazil Campinas +Brazil São_Carlos +Brazil Carangola +Brazil Bagé +Brazil Lorena +Brazil Campos_dos_Goytacazes +Brazil Salvador +Brazil Ribeirão_Preto +Brazil Lavras +Brazil Criciúma +Brazil São_Paulo +Brazil Recife +Brazil Porto_Alegre +Brazil Uberlândia +Brazil Belo_Horizonte +Brazil Serro +Brazil Brasília +Brazil Itajubá +Brazil Lagoa_da_Prata +Brazil Campo_Grande +Brazil Niterói +Brazil Pelotas +Brazil Contagem +Germany Wernigerode +Germany Greifswald +Germany Darmstadt +Germany Friedberg +Germany Wedel +Germany Möckern +Germany Ingolstadt +Germany Heidelberg +Germany Speyer +Germany Lörrach +Germany Ludwigsburg +Germany Fulda +Germany Augsburg +Germany Berlin +Germany Munich +Germany Kempten +Germany Koblenz +Germany Trier +Germany Othmarschen +Germany Freiburg_im_Breisgau +Germany Hamburg +Germany Hanover +Germany Nuremberg +Germany Reutlingen +Germany Ilmenau +Germany Stuttgart +Germany Aachen +Germany Frankfurt +Germany Flensburg +Germany Mannheim +Germany Mainz +Germany Iserlohn +Germany Leipzig +Germany Cologne +Germany Heilbronn +Germany Konstanz +Germany Wiesbaden +Germany Lübeck +Germany Kassel +Germany Weimar +Indonesia Gorontalo +Indonesia West_Java +Indonesia Surabaya +Indonesia Depok +Indonesia Bandar_Lampung +Indonesia Tangerang +Indonesia Surakarta +Indonesia Makassar +Indonesia Yogyakarta +Indonesia Bangkalan +Indonesia Ciputat +Indonesia Manokwari_Regency +Indonesia Purwokerto +Indonesia Samarinda +Indonesia Serang +Indonesia Bali +Indonesia Padang +Indonesia Semarang +Indonesia Manado +Indonesia Central_Jakarta +Indonesia Medan +Indonesia Bengkulu +Indonesia Manokwari +Indonesia Jakarta +Indonesia Indonesia +Indonesia Tarakan +Indonesia Bandung +Indonesia Banda_Aceh +Indonesia Banjarmasin +Indonesia Jambi_City +Indonesia Pontianak +Indonesia Mataram +Indonesia Jimbaran +Indonesia Ambon +Indonesia Badung_Regency +Indonesia Kupang +Indonesia Magelang +Indonesia West_Papua +Indonesia South_Tangerang +Indonesia Tegal_Regency +Japan Nagoya +Japan Yokosuka +Japan Chiba +Japan Chiyoda +Japan Shibuya +Japan Nara +Japan Hachiōji +Japan Sagamihara +Japan Ichikawa +Japan Kyoto +Japan Gifu +Japan Toyoake +Japan Fujisawa +Japan Okazaki +Japan Hirosaki +Japan Nagakute +Japan Akita_City +Japan Nagasaki +Japan Shimonoseki +Japan Aomori +Japan Tokyo +Japan Toyohashi +Japan Toyota +Japan Owariasahi +Japan Bunkyo +Japan Hamamatsu +Japan Nisshin +Japan Niigata +Japan Machida +Japan Osaka +Japan Aichi_Prefecture +Japan Sapporo +Japan Setagaya +Japan Hachinohe +Japan Yokohama +Japan Atsugi +Japan Kobe +Japan Kagoshima +Japan Tokushima +Japan Chikusa-ku +Mexico Monterrey +Mexico Mexicali +Mexico Chihuahua +Mexico Zacapoaxtla +Mexico Culiacán +Mexico San_Nicolás_de_los_Garza +Mexico Mbabane +Mexico Guadalajara +Mexico Villahermosa +Mexico Antón_Lizardo +Mexico Ciudad_Obregón +Mexico Tlalpan +Mexico Saltillo +Mexico San_Pedro_Garza_García +Mexico Montemorelos +Mexico Pedro_Escobedo +Mexico Ensenada +Mexico Mexico_City +Mexico Querétaro +Mexico Puebla +Mexico Toluca +Mexico Oaxaca +Mexico Jiquilpan +Mexico Irapuato +Mexico Tlatelolco +Mexico Ixtlán_de_Juárez +Mexico Altamira +Mexico Mérida +Mexico Tijuana +Mexico Santa_Catarina +Mexico Colima +Mexico Campeche +Mexico San_Luis_Potosí +Mexico Cancún +Mexico Coyoacán +Mexico Tepic +Mexico Hermosillo +Mexico Chapingo +Mexico Morelia +Mexico Nuevo_Laredo +Pakistan Sukkur +Pakistan Khuzdar +Pakistan Larkana_District +Pakistan Jhang +Pakistan Nawabshah +Pakistan Swat +Pakistan http://maps.google.com/maps?ll=25.530889,69.017306&spn=0.01,0.01&t=m&q=25.530889,69.017306 +Pakistan Sialkot +Pakistan Ahmedpur_Sial +Pakistan Lahore +Pakistan Kahuta +Pakistan Shorkot +Pakistan Mirpur +Pakistan Swabi +Pakistan Taxila +Pakistan Kharian +Pakistan Quetta +Pakistan Faisalabad +Pakistan Topi +Pakistan Karachi +Pakistan Tando_Jam +Pakistan Surat,India +Pakistan New_Muslim_Town +Pakistan Multan +Pakistan Islamabad +Pakistan Risalpur +Pakistan Islamabad/Rawalpindi,Lahore +Pakistan Major_Cities +Pakistan Bahria_Town +Pakistan Peshawar +Pakistan Dera_Ismail_Khan +Pakistan Abbottabad +Pakistan Jamshoro +Pakistan Rawalpindi +Pakistan Wah_Cantonment +Pakistan Khyber_Pakhtunkhwa +Pakistan Sahiwal_Division +Pakistan Nilore +Pakistan Rahim_Yar_Khan +Pakistan Khairpur +Philippines Iloilo_City +Philippines Cabuyao +Philippines Kalibo +Philippines Davao_City +Philippines Iligan +Philippines Mandaluyong +Philippines Tuguegarao +Philippines Manila +Philippines Cebu_City +Philippines Santa_Maria +Philippines Roxas +Philippines Pasay +Philippines Laoag +Philippines Legazpi +Philippines Ermita +Philippines Dumaguete +Philippines Bacolod +Philippines Los_Baños +Philippines Silang +Philippines Taguig +Philippines Biñan +Philippines Lipa +Philippines Zamboanga_City +Philippines Intramuros +Philippines Malate +Philippines Dagupan +Philippines Angeles +Philippines Makati +Philippines Tacloban +Philippines Cagayan_de_Oro +Philippines Cavite_City +Philippines Quezon_City +Philippines Sampaloc +Philippines Bislig +Philippines Tagbilaran +Philippines Santa_Rosa +Philippines San_Miguel +Philippines Cabanatuan +Philippines Malolos +Philippines Naga +Russia Tula +Russia Rostov +Russia Vladivostok +Russia Zhukovsky +Russia Yekaterinburg +Russia Perm +Russia Yaroslavl +Russia Krasnodar +Russia Biysk +Russia Barnaul +Russia Kursk +Russia Saransk +Russia Saint_Petersburg +Russia Moscow +Russia Kemerovo_Oblast +Russia Omsk +Russia Bishkek +Russia Magnitogorsk +Russia Murmansk +Russia Tomsk +Russia Monino +Russia Kazan +Russia Ulyanovsk +Russia Ivanovo_Oblast +Russia Vologda +Russia Pereslavl-Zalessky +Russia Krasnoyarsk +Russia Voronezh +Russia Ulan-Ude +Russia Chita +Russia Skolkovo +Russia Saratov +Russia Izhevsk +Russia Moscow_International_Business_Center +Russia Belgorod +Russia Ufa +Russia Volgograd +Russia Zaoksky +Russia Gorno-Altaysk +Russia Irkutsk +United_States New_Haven +United_States Nashville +United_States San_Francisco +United_States Chicago +United_States Lubbock +United_States New_York_City +United_States Gainesville +United_States Philadelphia +United_States Richmond +United_States New_York +United_States Columbus +United_States Indianapolis +United_States Columbia +United_States Phoenix +United_States Notre_Damena +United_States Pittsburgh +United_States Los_Angeles +United_States Ann_Arbor +United_States San_Diego +United_States Berkeley +United_States Atlanta +United_States Jacksonville +United_States Orlando +United_States San_Antonio +United_States Milwaukee +United_States Cincinnati +United_States Fayetteville +United_States Austin +United_States Baltimore +United_States Houston +United_States Cambridge +United_States Tallahassee +United_States Boston +United_States Washington +United_States Minneapolis +United_States New_Orleans +United_States Cleveland +United_States St._Louis +United_States Portland +United_States Ithaca +Vietnam Tuyên_Quang +Vietnam Quảng_Ngãi +Vietnam Quy_Nhơn +Vietnam Thái_Nguyên +Vietnam Hanoi +Vietnam Nha_Trang +Vietnam Đông_Hà +Vietnam Đồng_Hới +Vietnam Bắc_Giang +Vietnam Bến +Vietnam Da_Nang +Vietnam Nghe_An_province +Vietnam Long_Xuyen +Vietnam Biên_Hòa +Vietnam Thái_Bình +Vietnam Mỹ_Tho +Vietnam Sơn_La +Vietnam Phan_Rang–Tháp_Chàm +Vietnam Móng_Cái +Vietnam Phan_Thiết +Vietnam Thanh_Hóa +Vietnam Điện_Biên_Phủ +Vietnam Trà_Vinh +Vietnam Phủ_Lý +Vietnam Tuy_Hòa +Vietnam Hung_Yen_province +Vietnam Tân_An +Vietnam Vietnam +Vietnam Sóc_Trăng +Vietnam Cần_Thơ +Vietnam Huế +Vietnam Tam_Kỳ +Vietnam Gia_lam +Vietnam Rạch_Giá +Vietnam Cua_Lo +Vietnam Bạc_Liêu +Vietnam Ho_Chi_Minh_City +Vietnam Bà_Rịa +Vietnam Đà_Lạt +Vietnam Pleiku +Afghanistan Puli_Khumri +Afghanistan Lashkar_Gah +Afghanistan Jalalabad +Afghanistan Karachi +Afghanistan Asadabad +Afghanistan Kabul +Afghanistan Kandahar +Afghanistan Gardēz +Afghanistan Herat +Afghanistan Khost +Algeria Algiers +Algeria Batna +Algeria Tiaret +Algeria El_Harrach +Algeria Guelma +Algeria Bab_Ezzouar +Algeria Chief +Algeria Annaba +Algeria El_Oued +Algeria Oran +Argentina Córdoba +Argentina Mendoza +Argentina Buenos_Aires +Argentina Various +Argentina Posadas +Argentina San_Martín +Argentina Corrientes +Argentina Presidencia_Roque_Sáenz_Peña +Argentina Rosario +Argentina San_Miguel_de_Tucumán +Australia Geelong +Australia Sydney +Australia Canberra +Australia Brisbane +Australia Melbourne +Australia Perth +Australia Darlington +Australia Camperdown +Australia Parkville +Australia Adelaide +Belarus Vitebsk +Belarus Baranovichi +Belarus Bobruysk +Belarus Mogilev +Belarus Gomel +Belarus Borisov +Belarus Minsk +Belarus Brest +Belarus Grodno +Belarus Barysaw +Belgium Liège +Belgium Leuven +Belgium Paris +Belgium Tonbridge +Belgium Antwerp +Belgium Ottignies-Louvain-la-Neuve +Belgium Canterbury +Belgium Medway +Belgium Ghent +Belgium Brussels +Burma Taungoo_Township +Burma Sittwe_District +Burma Lanmadaw_Township +Burma Mayangone +Burma Insein_Township +Burma Taungoo +Burma Yangon +Burma Loikaw +Burma Pyin_U_Lwin +Burma Meiktila +Canada Toronto +Canada Fredericton +Canada Edmonton +Canada Ottawa +Canada London +Canada Winnipeg +Canada Hamilton +Canada Saskatoon +Canada Vancouver +Canada Montreal +Cambodia Kampong_Cham +Cambodia Siem_Reap +Cambodia Kampong_Speu +Cambodia Ta_Khmau +Cambodia Pray_Veng +Cambodia Battambang +Cambodia Borey_Seang_Nam +Cambodia Phnom_Penh +Cambodia Sihanoukville +Cambodia Pursat +Cameroon Mokolo +Cameroon Baffoussam +Cameroon Garoua +Cameroon Bangangté +Cameroon Maroua +Cameroon Buea +Cameroon Kumba +Cameroon Yaoundé +Cameroon Bamenda +Cameroon Douala +Chile Valdivia +Chile Santiago +Chile Antofagasta +Chile Iquique +Chile Valparaíso +Chile Temuco +Chile Viña_del_Mar +Chile Puerto_Montt +Chile Osorno +Chile Concepción +Colombia Bogotá +Colombia Cartagena +Colombia Pamplona +Colombia Manizales +Colombia Armenia +Colombia Bucaramanga +Colombia Cali +Colombia Barranquilla +Colombia Tunja +Colombia Medellín +Cuba Havana +Cuba Marianao +Cuba Santiago_de_Cuba +Cuba Jagüey_Grande +Cuba Cienfuegos +Cuba Matanzas +Cuba Pinar_del_Río +Cuba Ciego_de_Ávila +Cuba Santa_Clara +Cuba Camagüey +Czech_Republic Ostrava +Czech_Republic Liberec +Czech_Republic Prague +Czech_Republic Mladá_Boleslav +Czech_Republic České_Budějovice +Czech_Republic Ústí_nad_Labem +Czech_Republic Hradec_Králové +Czech_Republic Brno +Czech_Republic Moravia +Czech_Republic Olomouc +Democratic_Republic_of_the_Congo Kinshasa +Democratic_Republic_of_the_Congo Bandundu +Democratic_Republic_of_the_Congo Baraka +Democratic_Republic_of_the_Congo Boma +Democratic_Republic_of_the_Congo Bukavu +Democratic_Republic_of_the_Congo Goma +Democratic_Republic_of_the_Congo Kisangani +Democratic_Republic_of_the_Congo Kananga +Democratic_Republic_of_the_Congo Lubumbashi +Democratic_Republic_of_the_Congo Bas-Congo +Egypt Alexandria +Egypt Monufia_Governorate +Egypt Cairo +Egypt Beni_Suef +Egypt Giza +Egypt El_Shorouk +Egypt New_Cairo +Egypt Ramadan_City +Egypt Bilbeis +Egypt Sohag +England Southampton +England Manchester +England Leicester +England Newcastle_upon_Tyne +England London +England Durham +England Birmingham +England York +England Sheffield +England Leeds +Ethiopia Dire_Dawa +Ethiopia Debre_Marqos +Ethiopia Nekemte +Ethiopia Awasa +Ethiopia Mek'ele +Ethiopia Bahir_Dar +Ethiopia Addis_Ababa +Ethiopia Mettu +Ethiopia Adama +Ethiopia Jimma +France Lyon +France Toulouse +France Montpellier +France Lille +France Paris +France Aix-en-Provence +France Évry +France Nancy +France Grenoble +France Nantes +Greece Thessaloniki +Greece Kozani +Greece Patras +Greece Kalamata +Greece Athens +Greece Trikala +Greece Crete +Greece Karditsa +Greece Piraeus +Greece Larissa +Hungary Győr +Hungary Budapest +Hungary Szeged +Hungary Esztergom +Hungary Salgótarján +Hungary Zalaegerszeg +Hungary Visonta +Hungary Debrecen +Hungary Veszprém +Hungary Piliscsaba +Iran Shiraz +Iran Shahrekord +Iran Babol +Iran Mashhad +Iran Zanjan +Iran Qazvin +Iran Tabriz +Iran Isfahan +Iran Tehran +Iran Qom +Italy Venice +Italy Pisa +Italy Naples +Italy Trieste +Italy Milan +Italy Rome +Italy Pavia +Italy Florence +Italy Turin +Italy Brescia +Kazakhstan Astana +Kazakhstan Aktobe +Kazakhstan Karagandy +Kazakhstan Oskemen +Kazakhstan Pavlodar +Kazakhstan Taraz +Kazakhstan Semey +Kazakhstan Shymkent +Kazakhstan Almaty +Kazakhstan Oral +Kenya Karachi +Kenya Machakos +Kenya Eldoret +Kenya Kikuyu +Kenya Kitengela +Kenya Nakuru +Kenya Kitale +Kenya Thika +Kenya Nairobi +Kenya Nyeri +Madagascar Ambovombe +Madagascar Amparafaravola +Madagascar Mahajanga +Madagascar Antsirabe +Madagascar Antananarivo +Madagascar Toamasina +Madagascar Fianarantosa +Madagascar Antanifotsy +Madagascar Toliara +Madagascar Antsiranana +Malaysia Cyberjaya +Malaysia Kuching +Malaysia Nilai +Malaysia George_Town +Malaysia Penang +Malaysia Selangor +Malaysia Wilayah_Persekutuan_Kuala_Lumpur +Malaysia Kuala_Lumpur +Malaysia Terengganu +Malaysia Shah_Alam +Moldova Codru +Moldova Ungheni +Moldova Orhei +Moldova Comrat +Moldova Cahul +Moldova Tiraspol +Moldova Chişinău +Moldova Balti +Moldova Bender +Moldova Soroca +Morocco Oujda +Morocco Casablanca +Morocco Marrakech +Morocco Rabat +Morocco Sale +Morocco Ifrane +Morocco Agadir +Morocco Fes +Morocco Meknes +Morocco Tangier-Tetouan +Nepal Nala +Nepal Kirtipur +Nepal Balkumari +Nepal Lumbini +Nepal Pokhara +Nepal Dharan +Nepal Gwarko +Nepal Lalitpur +Nepal Kathmandu +Nepal Rajbiraj +Netherlands Groningen +Netherlands The_Hague +Netherlands Heerlen +Netherlands Leiden +Netherlands Maastricht +Netherlands Eindhoven +Netherlands Utrecht +Netherlands Amsterdam +Netherlands Tilburg +Netherlands Rotterdam +Niger Tessaoua +Niger Arlit +Niger Abidjan +Niger Niamey +Niger Zinder +Niger Tahoua +Niger Birni_Nkonni +Niger Agadez +Niger Maradi +Niger Dosso +Peru Trujillo +Peru Distrito_de_Bellavista +Peru Chorrillos_District +Peru Arequipa +Peru Jesús_María +Peru Chosica,Lima +Peru Huancayo +Peru Callao +Peru Lima +Peru La_Punta +Poland Gdańsk +Poland Szczecin +Poland Wrocław +Poland Radom +Poland Łódź +Poland Katowice +Poland Lublin +Poland Kraków +Poland Poznań +Poland Warsaw +Portugal Porto +Portugal Steenokkerzeel +Portugal Faro +Portugal Almada +Portugal Braga +Portugal Coimbra +Portugal Lisbon +Portugal Évora +Portugal Mesa +Portugal Funchal +Taiwan Taichung +Taiwan Yongkang_District +Taiwan Zhongli +Taiwan Yunlin_County +Taiwan Hsinchu +Taiwan Kaohsiung +Taiwan Taiwan +Taiwan New_Taipei +Taiwan Taipei +Taiwan Tainan +Romania Bucharest +Romania Cluj-Napoca +Romania Constanța +Romania Iași +Romania Galați +Romania Brașov +Romania Arad +Romania Oradea +Romania Târgoviște +Romania Timișoara +Senegal Kolda +Senegal Saint-Louis +Senegal Abidjan +Senegal Thies +Senegal Louga +Senegal Touba +Senegal Dakar +Senegal Diourbel +Senegal Tambacounda +Senegal Rufisque +South_Africa Cape_Town +South_Africa Durban +South_Africa Bloemfontein +South_Africa Saldanha +South_Africa eMalahleni +South_Africa Mthatha +South_Africa Mbabane +South_Africa Johannesburg +South_Africa Bellville +South_Africa Pretoria +South_Korea Seoul +South_Korea Seongbuk-gu +South_Korea Suwon +South_Korea Gwangju +South_Korea Daejeon +South_Korea Pohang +South_Korea Busan +South_Korea Andong +South_Korea Yongin +South_Korea Incheon +Spain Salamanca +Spain Madrid +Spain Barcelona +Spain Seville +Spain Valladolid +Spain Santander +Spain Vigo +Spain San_Sebastián +Spain Valencia +Spain Bilbao +Sri_Lanka Kelaniya +Sri_Lanka Elpitiya +Sri_Lanka Matara +Sri_Lanka Nugegoda +Sri_Lanka Colombo +Sri_Lanka Kandy +Sri_Lanka Peradeniya +Sri_Lanka Ratmalana_Airport +Sri_Lanka Malabe +Sri_Lanka Batalanda +Sweden Rovaniemi +Sweden Malmö +Sweden Uppsala +Sweden Umeå +Sweden Borås +Sweden Jönköping +Sweden Stockholm +Sweden Lund +Sweden Örebro +Sweden Gothenburg +Tanzania Karachi +Tanzania Mwanza +Tanzania Arusha +Tanzania Morogoro +Tanzania Mzumbe_morogoro +Tanzania Dar_es_Salaam +Tanzania Mbeya +Tanzania Unguja +Tanzania Dodoma +Tanzania Zanzibar +Thailand Nakhon_Pathom_Province +Thailand Bangkok +Thailand Pathum_Thani_Province +Thailand Pathum_Thani +Thailand Lampang +Thailand Chiang_Mai +Thailand Klong_Luang +Thailand Hat_Yai +Thailand Songkhla +Thailand Narathiwat +Tunisia La_Marsa +Tunisia Bizerte +Tunisia Etadhamen +Tunisia Sfax +Tunisia Gabes +Tunisia Monastir +Tunisia Sousse +Tunisia Kairouan +Tunisia Tunis +Tunisia El_Mourouj +Turkey Mersin +Turkey Istanbul +Turkey Çankaya +Turkey Ankara +Turkey Bursa +Turkey Gaziantep +Turkey Eskişehir +Turkey Gebze +Turkey Kadıköy +Turkey Izmir +Ukraine Kiev +Ukraine Mykolaiv +Ukraine Dnipropetrovsk +Ukraine Kharkiv +Ukraine Uzhhorod +Ukraine Lviv +Ukraine Donetsk +Ukraine Luhansk +Ukraine Chernivtsi +Ukraine Odessa +United_Kingdom Manchester +United_Kingdom Cambridge +United_Kingdom Coventry +United_Kingdom Liverpool +United_Kingdom North_Wales +United_Kingdom Birmingham +United_Kingdom Canterbury +United_Kingdom Bristol +United_Kingdom London +United_Kingdom Glasgow +Venezuela Caracas +Venezuela Barquisimeto +Venezuela Nueva_Esparta +Venezuela Anzoátegui +Venezuela Maracaibo +Venezuela Monagas +Venezuela Valencia +Venezuela Bolívar +Venezuela Mérida +Venezuela Sucre +Yemen Al_Hudaydah +Yemen Zinjibar +Yemen Dhamar +Yemen Ibb +Yemen Hadhramaut +Yemen Ta'izz +Yemen Sayyan +Yemen Aden +Yemen Sana'a +Yemen Al_Mukalla +Zambia Luanshya +Zambia Livingstone +Zambia Monze +Zambia Ndola +Zambia Kasama +Zambia Kabwe +Zambia Mufulira +Zambia Kitwe +Zambia Lusaka +Zambia Chingola diff --git a/ldbc_socialnet_dbgen/src/main/resources/dictionaries/dicLocations.txt b/ldbc_socialnet_dbgen/src/main/resources/dictionaries/dicLocations.txt new file mode 100644 index 000000000..d61a77c13 --- /dev/null +++ b/ldbc_socialnet_dbgen/src/main/resources/dictionaries/dicLocations.txt @@ -0,0 +1,111 @@ +Asia India 21 77 1000000000 0.146305779078 +Asia China 35 105 1000000000 0.292611558157 +Africa Angola -9 13 5000000 0.293343087052 +Europe Austria 48 16 5000000 0.294074615947 +Asia Azerbaijan 40 48 5000000 0.294806144843 +South_America Bolivia -19 -65 5000000 0.295537673738 +Europe Bosnia_and_Herzegovina 44 18 5000000 0.296269202634 +Europe Bulgaria 43 23 5000000 0.297000731529 +Africa Chad 12 15 5000000 0.297732260424 +Europe Croatia 46 16 5000000 0.29846378932 +Europe Denmark 56 56 5000000 0.299195318215 +North_America Dominican_Republic 19 -71 5000000 0.29992684711 +North_America El_Salvador 14 -89 5000000 0.300658376006 +Europe Estonia 59 25 5000000 0.301389904901 +Europe Finland 60 25 5000000 0.302121433797 +North_America Honduras 14 -87 5000000 0.302852962692 +Asia Hong_Kong 22 114 5000000 0.303584491587 +Asia Israel 31 35 5000000 0.304316020483 +Asia Jordan 32 36 5000000 0.305047549378 +Asia Laos 18 103 5000000 0.305779078274 +Europe Latvia 57 24 5000000 0.306510607169 +Africa Liberia 6 -11 5000000 0.307242136064 +Africa Libya 27 13 5000000 0.30797366496 +Europe Lithuania 55 25 5000000 0.308705193855 +Europe Malta 36 15 5000000 0.309436722751 +Africa Mauritania 18 -16 5000000 0.310168251646 +Africa Mauritius -20 58 5000000 0.310899780541 +Asia Mongolia 48 107 5000000 0.311631309437 +Africa Namibia -23 17 5000000 0.312362838332 +Australia New_Zealand -41 174 5000000 0.313094367228 +North_America Nicaragua 12 -86 5000000 0.313825896123 +Europe Northern_Ireland 55 -6 5000000 0.314557425018 +Europe Norway 60 11 5000000 0.315288953914 +Asia Oman 24 59 5000000 0.316020482809 +North_America Panama 9 -80 5000000 0.316752011704 +Australia Papua_New_Guinea -9 147 5000000 0.3174835406 +North_America Puerto_Rico 18 -66 5000000 0.318215069495 +Europe Republic_of_Ireland 53 -6 5000000 0.318946598391 +Europe Republic_of_Macedonia 42 21 5000000 0.319678127286 +Africa Rwanda -2 30 5000000 0.320409656181 +Europe Scotland 56 -3 5000000 0.321141185077 +Asia Singapore 1 104 5000000 0.321872713972 +Europe Slovakia 48 17 5000000 0.322604242868 +Europe Slovenia 46 15 5000000 0.323335771763 +Africa Swaziland -26 31 5000000 0.324067300658 +Europe Switzerland 47 7 5000000 0.324798829554 +Asia Tajikistan 39 69 5000000 0.325530358449 +South_America Uruguay -35 -56 5000000 0.326261887345 +Europe Wales 51 -3 5000000 0.32699341624 +South_America Brazil -16 -48 200000000 0.356254572056 +Europe Germany 53 13 200000000 0.385515727871 +Asia Indonesia -6 107 200000000 0.414776883687 +Asia Japan 36 140 200000000 0.444038039503 +North_America Mexico 19 -99 200000000 0.473299195318 +Asia Pakistan 34 73 200000000 0.502560351134 +Asia Philippines 15 121 200000000 0.53182150695 +Europe Russia 56 38 200000000 0.561082662765 +North_America United_States 39 -77 200000000 0.590343818581 +Asia Vietnam 21 106 200000000 0.619604974396 +Asia Afghanistan 35 69 50000000 0.62692026335 +Africa Algeria 37 3 50000000 0.634235552304 +South_America Argentina -35 -58 50000000 0.641550841258 +Australia Australia -35 149 50000000 0.648866130212 +Europe Belarus 54 28 50000000 0.656181419166 +Europe Belgium 51 4 50000000 0.66349670812 +Asia Burma 20 96 50000000 0.670811997074 +North_America Canada 45 -76 50000000 0.678127286028 +Asia Cambodia 12 105 50000000 0.685442574982 +Africa Cameroon 4 12 50000000 0.692757863936 +South_America Chile -33 -71 50000000 0.70007315289 +South_America Colombia 5 -74 50000000 0.707388441843 +North_America Cuba 23 -82 50000000 0.714703730797 +Europe Czech_Republic 50 14 50000000 0.722019019751 +Africa Democratic_Republic_of_the_Congo -4 15 50000000 0.729334308705 +Africa Egypt 30 31 50000000 0.736649597659 +Europe England 52 0 50000000 0.743964886613 +Africa Ethiopia 9 39 50000000 0.751280175567 +Europe France 47 49 50000000 0.758595464521 +Europe Greece 38 22 50000000 0.765910753475 +Europe Hungary 47 19 50000000 0.773226042429 +Asia Iran 32 51 50000000 0.780541331383 +Europe Italy 42 12 50000000 0.787856620337 +Asia Kazakhstan 43 72 50000000 0.79517190929 +Africa Kenya -1 37 50000000 0.802487198244 +Africa Madagascar -19 48 50000000 0.809802487198 +Asia Malaysia 3 102 50000000 0.817117776152 +Europe Moldova 47 29 50000000 0.824433065106 +Africa Morocco 34 -7 50000000 0.83174835406 +Asia Nepal 27 85 50000000 0.839063643014 +Europe Netherlands 52 6 50000000 0.846378931968 +Africa Niger 14 2 50000000 0.853694220922 +South_America Peru -12 -77 50000000 0.861009509876 +Europe Poland 52 21 50000000 0.86832479883 +Europe Portugal 39 -9 50000000 0.875640087783 +Asia Taiwan 25 120 50000000 0.882955376737 +Europe Romania 44 26 50000000 0.890270665691 +Africa Senegal 15 -17 50000000 0.897585954645 +Africa South_Africa -29 25 50000000 0.904901243599 +Asia South_Korea 38 127 50000000 0.912216532553 +Europe Spain 40 -4 50000000 0.919531821507 +Africa Sri_Lanka 7 80 50000000 0.926847110461 +Europe Sweden 59 18 50000000 0.934162399415 +Africa Tanzania -6 35 50000000 0.941477688369 +Asia Thailand 14 100 50000000 0.948792977323 +Africa Tunisia 37 10 50000000 0.956108266277 +Asia Turkey 40 33 50000000 0.96342355523 +Europe Ukraine 50 31 50000000 0.970738844184 +Europe United_Kingdom 52 0 50000000 0.978054133138 +South_America Venezuela 11 -67 50000000 0.985369422092 +Asia Yemen 15 44 50000000 0.992684711046 +Africa Zambia -15 28 50000000 1.0